1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# -*- coding:utf-8 -*-
# @Time : 2020/5/29 10:11
# @Author : litao
from PIL import Image
import pytesseract,os,re
import cv2
import argparse
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--preprocess", type=str, default="thresh",
help="type of preprocessing to be done")
args = vars(ap.parse_args())
class Languages:
CHS = 'chi_sim'
ENG = 'eng'
def img_to_str(image_path, lang=Languages.CHS):
# img = Image.open(image_path)
# width, height = img.size
# img.show()
# mode = img.mode
# print(img.size)
# thumb = img.crop((10,42,160,150))
# img.grab(0,0,250,200)
# thumb.save("thumb.jpg")
# image = cv2.imread(image_path)
# gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# # check to see if we should apply thresholding to preprocess the
# # image
# if args["preprocess"] == "thresh":
# gray = cv2.threshold(gray, 0, 255,
# cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# # make a check to see if median blurring should be done to remove
# # noise
# elif args["preprocess"] == "blur":
# gray = cv2.medianBlur(gray, 3)
# # write the grayscale image to disk as a temporary file so we can
# # apply OCR to it
# filename = "thumb.png"
# cv2.imwrite(filename, gray)
# thumb = img.crop((40, 30, 100, 70))
#img.grab((30, 30, 150, 80))
# thumb.save("thumb.jpg")
# ,config="-psm 7 digits"
img = Image.open(image_path)
# thumb = img.crop((10,42,160,150))
# thumb = img.crop((40, 30, 100, 70))
thumb = img.crop((490, 0, 560, 60))
thumb.save("thumb.jpg")
return pytesseract.image_to_string(thumb, lang,config="-psm 7 digits")
def file_path_scan(file_path):
for filename in os.listdir(file_path):
path = os.path.join(file_path, filename)
if not os.path.isfile(path):
continue
title = img_to_str(path, lang=Languages.CHS)
print(title)
try:
play_count = re.findall("\d+",title)[0]
#print(play_count)
except:
#print(title)
play_count= 0
yield filename,play_count
file_path = r'D:\work_file\word_file_new\litao\num'
for filename,play_count in file_path_scan(file_path):
time_str = filename.replace(".png","")
time_str = time_str[0:13] +":"+ time_str[13:15]+":"+ time_str[15:]
# print(time_str)
print(time_str,play_count)
# print(img_to_str(r'D:\work_file\word_file_new\litao\screen\2020-04-16 202632.png', lang=Languages.CHS))