Commit b92b2705 authored by 李康's avatar 李康

add brand script and the scraped data

parent 283e2486
__pycache__ __pycache__
.idea .idea
.DS_Store
from appium import webdriver
def get_desired_capabilities():
desired_caps = {
"platformName": "Android",
"deviceName": "Redmi Note 4X",
"udid": "d52196830204",
"automationName": "uiautomator2",
"appActivity": ".ui.LauncherUI",
"noReset": "True",
"waitForQuiescence": "false",
"useJSONSource": "true",
"simpleIsVisibleCheck": "true"
}
return desired_caps
def get_uri():
return 'http://localhost:4723/wd/hub'
def setUpDriver():
desired_cap = get_desired_capabilities()
print(desired_cap)
uri = get_uri()
driver = webdriver.Remote(uri, desired_cap)
return driver
\ No newline at end of file
from selenium.webdriver.support.ui import WebDriverWait import cv2
from selenium.webdriver.support import expected_conditions as EC import uiautomator2 as u2
import unittest import numpy as np
import time
from selenium.common.exceptions import NoSuchElementException
from appium_common import setUpDriver
brand_xpath = "/hierarchy/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.LinearLayout/" \ brand_dir = "brands"
"android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.view.ViewGroup/" \ delim = "; "
"android.support.v4.view.ViewPager/android.support.v7.widget.RecyclerView/" \
"android.widget.RelativeLayout[%d]"
def scroll(drv): def cropImg(img, out, bounds):
t1 = time.time() cropped = img[int(bounds['top']):int(bounds['bottom']), int(bounds['left']):int(bounds['right'])] # 裁剪坐标为[y0:y1, x0:x1]
drv.swipe(0, 600, 0, 0, 1000) cv2.imwrite(out, cropped)
t2 = time.time()
print("scroll: %s" % (t2 - t1))
def getAllBrands(): def getBrandDetail(d, brand, done):
t1 = time.time() all_texts = brand.sibling(className="android.widget.TextView")
drv = setUpDriver() name = all_texts[0].get_text()
t2 = time.time() if name not in done:
print("set up driver: %s" % (t2 - t1)) res = []
done = [] for text in all_texts:
while True: st = text.get_text()
t1 = time.time() if st in ["进入品牌", ""]:
brand = drv.find_element_by_xpath(brand_xpath % 1)
t2 = time.time()
print("find brand: %s" % (t2 - t1))
try:
t1 = time.time()
brand_name = brand.find_element_by_id("org.c2h4.afei.beauty:id/tv_name")
t2 = time.time()
print("find name: %s" % (t2 - t1))
except NoSuchElementException:
brand_name = None
try:
t1 = time.time()
brand_name_en = brand.find_element_by_id("org.c2h4.afei.beauty:id/tv_name_en")
t2 = time.time()
print("find english name: %s" % (t2 - t1))
except NoSuchElementException:
brand_name_en = None
if brand_name is not None:
if brand_name.text not in done:
done.append(brand_name.text)
print(brand_name.text)
print(brand_name_en.text)
else:
scroll(drv)
continue continue
elif brand_name_en is not None: elif st.startswith("代表单品"):
if brand_name_en.text not in done:
done.append(brand_name_en.text)
print(brand_name_en.text)
else:
scroll(drv)
continue continue
else:
res.append(st)
if len(res) in [3, 4]:
if len(res) == 3:
res = [res[0]] + res
done.append(name)
return res
else: else:
scroll(drv) return None
continue
def getBrandImg(d, name):
elems = d(className="android.support.v7.widget.RecyclerView").child(className="android.widget.RelativeLayout")
imgele = None
for idx in range(elems.count):
elemIdx = elems.count - 1 - idx
elem = elems[elemIdx]
if elem.child(text=name).count == 1:
imgele = elems[elemIdx - 1].child(resourceId="org.c2h4.afei.beauty:id/sriv")
break
if imgele is None:
print("Can't find image for %s" % name)
image = d.screenshot()
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
outpath = "%s/%s.jpg" % (brand_dir, name)
cropImg(image, outpath, imgele.info['visibleBounds'])
t1 = time.time() def getAllBrands():
brand_year = brand.find_element_by_id("org.c2h4.afei.beauty:id/tv_year") d = u2.connect_usb('d52196830204')
t2 = time.time() # print(d.dump_hierarchy())
print("find year: %s" % (t2 - t1)) f = open(brand_dir+"/brands.csv", 'a+')
print(brand_year.text) done = []
nocnt = 0
while True:
brand = d(className="android.support.v7.widget.RecyclerView").child(resourceId="org.c2h4.afei.beauty:id/tv_name")[0]
res = getBrandDetail(d, brand, done)
if res is not None:
print(delim.join(res))
f.write(delim.join(res) + "\n")
getBrandImg(d, res[0])
nocnt = 0
else:
nocnt += 1
if nocnt == 5:
break
d.swipe_ext("up", scale=0.2)
brands = d(className="android.support.v7.widget.RecyclerView").child(resourceId="org.c2h4.afei.beauty:id/tv_name")
for idx in range(1, brands.count):
res = getBrandDetail(d, brands[idx], done)
if res is not None:
f.writelines(delim.join(res))
print(delim.join(res) + "\n")
getBrandImg(d, res[0])
t1 = time.time() # while True:
brand_desc = brand.find_element_by_id("org.c2h4.afei.beauty:id/tv_desc") # brand = d(className="android.support.v7.widget.RecyclerView").child(resourceId="org.c2h4.afei.beauty:id/tv_name_en")[0]
t2 = time.time() # res = getBrandDetail(d, brand, done)
print("find desc: %s" % (t2 - t1)) # if res is not None:
# print(delim.join(res))
# f.writelines(delim.join(res))
# d.swipe_ext("down", scale=0.2)
print(brand_desc.text)
scroll(drv)
if __name__=='__main__': if __name__=='__main__':
getAllBrands() getAllBrands()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment