Commit 48a6ab3c authored by 李康's avatar 李康

support scrapping of rank list

parent 770f50fd
import cv2
import uiautomator2 as u2
import numpy as np
import sys, os, time
delim = "; "
filter_scroll_steps = 2
output_dir = "list"
def cropImg(img, out, bounds):
delta = 20 + 28
cropped = img[
(int(bounds['top']) + delta) : (int(bounds['bottom']) - delta),
(int(bounds['left']) + delta) : (int(bounds['right']) - delta)
] # 裁剪坐标为[y0:y1, x0:x1]
cv2.imwrite(out, cropped)
def getProductImg(d, path, pos):
image = d.screenshot(format='opencv')
cropImg(image, path, pos)
def getProductKey(product, names = None):
keyobj = None
name = product.child(resourceId="org.c2h4.afei.beauty:id/tv_name")
if name.count == 0:
name = ""
else:
keyobj = name
name = name.get_text()
name_en = product.child(resourceId="org.c2h4.afei.beauty:id/tv_name_en")
if name_en.count == 0:
name_en = ""
else:
if keyobj is None:
keyobj = name_en
name_en = name_en.get_text()
key = ""
if name != "":
key = name
elif name_en != "":
key = name_en
if names is not None:
names.append(name)
names.append(name_en)
return key, keyobj
def getProductDetailBasic(product, done):
names = []
key, keyobj = getProductKey(product, names)
if key in done or key == "":
return key, keyobj, None
rate_score = keyobj.sibling(resourceId="org.c2h4.afei.beauty:id/rate_score")
if rate_score.count == 0:
rate_score = ""
else:
rate_score = rate_score.get_text()
asess_num = keyobj.sibling(resourceId="org.c2h4.afei.beauty:id/tv_asess_num")
if asess_num.count == 0:
asess_num = ""
else:
asess_num = asess_num.get_text()
address = keyobj.sibling(resourceId="org.c2h4.afei.beauty:id/tv_address")
if address.count == 0:
address = ""
else:
address = address.get_text()
return key, keyobj, names + [rate_score, asess_num, address]
def getProductDetail(d, product, done):
key, keyobj, basicinfo = getProductDetailBasic(product, done)
if basicinfo is None:
return None
keyobj.click()
img = d(resourceId="org.c2h4.afei.beauty:id/iv_image")
wait_cnt = 0
while img.count == 0:
time.sleep(0.2)
wait_cnt += 1
if (wait_cnt == 300):
raise RuntimeError("No response from APP")
img = d(resourceId="org.c2h4.afei.beauty:id/iv_image")
# wait half second for the image to be stable to take screenshot,
# otherwise possibly the image will be different somehow
time.sleep(0.5)
path = output_dir + '/' + key + ".jpg"
getProductImg(d, path, img.info['bounds'])
effects = d(resourceId="org.c2h4.afei.beauty:id/rl_effect").child(className="android.widget.TextView")
temp = []
for effect in effects:
if effect.get_text().startswith("功效"):
continue
else:
temp.append(effect.get_text())
effects = " ".join(temp)
d.press("back")
time.sleep(1)
done.append(key)
basicinfo.append(effects)
return basicinfo
def saveProduct(f, res, product_type):
res.append(product_type)
print(delim.join(res))
f.write(delim.join(res) + "\n")
def getAllProductUiObj(d):
return d(resourceId="org.c2h4.afei.beauty:id/rv_container").child(className="android.widget.RelativeLayout")
def getAllProducts(d, f, product_type):
done = []
nocnt = 0
scraped_cnt = 0
scroll_scale = 0.1
while True:
products = getAllProductUiObj(d)
product = None
for temp in products:
if temp.info['bounds']['top'] == temp.info['visibleBounds']['top']:
product = temp
break
res = getProductDetail(d, product, done)
if res is not None:
saveProduct(f, res, product_type)
scraped_cnt += 1
if scraped_cnt % 10 == 0:
f.flush()
if scraped_cnt == 100:
return scraped_cnt
nocnt = 0
else:
nocnt += 1
if nocnt == int(1 / scroll_scale) + 1:
break
d.swipe_ext("up", scale=scroll_scale)
# handle the last few products in the list
products = getAllProductUiObj(d)
for idx in range(1, products.count):
res = getProductDetail(d, products[idx], done)
if res is not None:
saveProduct(f, res, product_type)
scraped_cnt += 1
return scraped_cnt
tap_list = ["洁面", "护肤水", "精华", "乳液·面霜", "防晒", "眼部", "清洁面膜", "面膜", "唇部", "身体"]
# tap_list = ["卸妆", "妆前", "粉底", "口红", "眉部", "眼影", "眼线", "睫毛", "腮红", "定妆", "遮瑕·修容", "香水"]
def getProductByType(argv):
target_idx = int(argv[1])
d = u2.connect_usb('d52196830204')
print(d.app_current())
d.freeze_rotation()
with open("%s/products.csv" % output_dir, 'a+') as f:
tap = tap_list[target_idx]
tap_ui_obj = d(text=tap)
if tap_ui_obj.count == 1:
d(text=tap).click()
else:
print("Tap %s (%d) is not found!" % (tap, tap_ui_obj.count))
return
time.sleep(1)
temp_cnt = getAllProducts(d, f, tap)
f.flush()
print("--------------------- tap summury ---------------------")
print("%s products are scraped from %s" % (temp_cnt, tap))
print("-------------------------------------------------------------")
d.freeze_rotation(False)
if __name__=='__main__':
getProductByType(sys.argv)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment