Commit f2bd2fee authored by 李康's avatar 李康

Add script for product scraping

parent b92b2705
import cv2
import uiautomator2 as u2
import numpy as np
import sys, os, time
delim = "; "
def cropImg(img, out, bounds):
delta = 20 + 28
cropped = img[
(int(bounds['top']) + delta) : (int(bounds['bottom']) - delta),
(int(bounds['left']) + delta) : (int(bounds['right']) - delta)
] # 裁剪坐标为[y0:y1, x0:x1]
cv2.imwrite(out, cropped)
def getProductDetail(d, brand_name, product, done):
keyobj = None
name = product.child(resourceId="org.c2h4.afei.beauty:id/tv_name")
if name.count == 0:
name = ""
else:
keyobj = name
name = name.get_text()
name_en = product.child(resourceId="org.c2h4.afei.beauty:id/tv_name_en")
if name_en.count == 0:
name_en = ""
else:
if keyobj is None:
keyobj = name_en
name_en = name_en.get_text()
key = ""
if name != "":
key = name
elif name_en != "":
key = name_en
if key in done or key == "":
return None
rate_score = keyobj.sibling(resourceId="org.c2h4.afei.beauty:id/rate_score")
if rate_score.count == 0:
rate_score = ""
else:
rate_score = rate_score.get_text()
asess_num = keyobj.sibling(resourceId="org.c2h4.afei.beauty:id/tv_asess_num")
if asess_num.count == 0:
asess_num = ""
else:
asess_num = asess_num.get_text()
address = keyobj.sibling(resourceId="org.c2h4.afei.beauty:id/tv_address")
if address.count == 0:
address = ""
else:
address = address.get_text()
keyobj.click()
img = d(resourceId="org.c2h4.afei.beauty:id/iv_image")
while img.count == 0:
time.sleep(0.1)
img = d(resourceId="org.c2h4.afei.beauty:id/iv_image")
time.sleep(0.5)
path = brand_name + '/' + key + ".jpg"
getProductImg(d, path, img.info['bounds'])
effects = d(resourceId="org.c2h4.afei.beauty:id/rl_effect").child(className="android.widget.TextView")
temp = []
for effect in effects:
if effect.get_text().startswith("功效"):
continue
else:
temp.append(effect.get_text())
effects = " ".join(temp)
d(resourceId="org.c2h4.afei.beauty:id/iv_back").click()
time.sleep(1)
done.append(key)
return [name, name_en, rate_score, asess_num, address, effects]
def getProductImg(d, path, pos):
image = d.screenshot()
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
cropImg(image, path, pos)
def saveProduct(f, res, product_type):
res.append(product_type)
print(delim.join(res))
f.write(delim.join(res) + "\n")
def getAllProducts(d, f, brand_name, product_type):
done = []
nocnt = 0
while True:
products = d(resourceId="org.c2h4.afei.beauty:id/rv_container").child(className="android.widget.RelativeLayout")
product = None
for temp in products:
if temp.info['bounds']['top'] == temp.info['visibleBounds']['top']:
product = temp
break
res = getProductDetail(d, brand_name, product, done)
if res is not None:
saveProduct(f, res, product_type)
nocnt = 0
else:
nocnt += 1
if nocnt == 3:
break
d.swipe_ext("up", scale=0.1)
products = d(resourceId="org.c2h4.afei.beauty:id/rv_container").child(className="android.widget.RelativeLayout")
for idx in range(1, products.count):
res = getProductDetail(d, brand_name, products[idx], done)
if res is not None:
saveProduct(f, res, product_type)
def getProductByType(argv):
brand_name = argv[1]
if not os.path.exists(brand_name):
os.makedirs(brand_name)
d = u2.connect_usb('d52196830204')
f = open(brand_name+"/products.csv", 'a+')
filter = d(resourceId="org.c2h4.afei.beauty:id/tv_filter")
filter.click()
ftypes = d(resourceId="org.c2h4.afei.beauty:id/tv_type")
ftype_names = []
for ftype in ftypes:
if ftype.get_text() == "全部品类":
continue
else:
ftype_names.append(ftype.get_text())
d(scrollable=True).scroll(steps=15)
ftypes = d(resourceId="org.c2h4.afei.beauty:id/tv_type")
for ftype in ftypes:
if ftype.get_text() == "全部品类":
continue
elif ftype.get_text() in ftype_names:
continue
else:
ftype_names.append(ftype.get_text())
d(scrollable=True).scroll.vert.backward()
for ftype_name in ftype_names:
print(ftype_name)
for ftype_name in ftype_names:
# if ftype_name != "护肤水":
# continue
ftype = d(resourceId="org.c2h4.afei.beauty:id/tv_type", text=ftype_name)
if ftype.count == 0:
d(scrollable=True).scroll(steps=15)
ftype = d(resourceId="org.c2h4.afei.beauty:id/tv_type", text=ftype_name)
ftype.click()
time.sleep(1)
getAllProducts(d, f, brand_name, ftype_name)
filter = d(resourceId="org.c2h4.afei.beauty:id/tv_filter")
filter.click()
if __name__=='__main__':
getProductByType(sys.argv)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment