Commit b92b2705 authored by 李康's avatar 李康

add brand script and the scraped data

parent 283e2486
__pycache__
.idea
.DS_Store
from appium import webdriver
def get_desired_capabilities():
desired_caps = {
"platformName": "Android",
"deviceName": "Redmi Note 4X",
"udid": "d52196830204",
"automationName": "uiautomator2",
"appActivity": ".ui.LauncherUI",
"noReset": "True",
"waitForQuiescence": "false",
"useJSONSource": "true",
"simpleIsVisibleCheck": "true"
}
return desired_caps
def get_uri():
return 'http://localhost:4723/wd/hub'
def setUpDriver():
desired_cap = get_desired_capabilities()
print(desired_cap)
uri = get_uri()
driver = webdriver.Remote(uri, desired_cap)
return driver
\ No newline at end of file
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import unittest
import time
from selenium.common.exceptions import NoSuchElementException
from appium_common import setUpDriver
import cv2
import uiautomator2 as u2
import numpy as np
brand_xpath = "/hierarchy/android.widget.FrameLayout/android.widget.FrameLayout/android.widget.LinearLayout/" \
"android.widget.FrameLayout/android.widget.LinearLayout/android.widget.FrameLayout/android.view.ViewGroup/" \
"android.support.v4.view.ViewPager/android.support.v7.widget.RecyclerView/" \
"android.widget.RelativeLayout[%d]"
brand_dir = "brands"
delim = "; "
def scroll(drv):
t1 = time.time()
drv.swipe(0, 600, 0, 0, 1000)
t2 = time.time()
print("scroll: %s" % (t2 - t1))
def cropImg(img, out, bounds):
cropped = img[int(bounds['top']):int(bounds['bottom']), int(bounds['left']):int(bounds['right'])] # 裁剪坐标为[y0:y1, x0:x1]
cv2.imwrite(out, cropped)
def getAllBrands():
t1 = time.time()
drv = setUpDriver()
t2 = time.time()
print("set up driver: %s" % (t2 - t1))
done = []
while True:
t1 = time.time()
brand = drv.find_element_by_xpath(brand_xpath % 1)
t2 = time.time()
print("find brand: %s" % (t2 - t1))
try:
t1 = time.time()
brand_name = brand.find_element_by_id("org.c2h4.afei.beauty:id/tv_name")
t2 = time.time()
print("find name: %s" % (t2 - t1))
except NoSuchElementException:
brand_name = None
try:
t1 = time.time()
brand_name_en = brand.find_element_by_id("org.c2h4.afei.beauty:id/tv_name_en")
t2 = time.time()
print("find english name: %s" % (t2 - t1))
except NoSuchElementException:
brand_name_en = None
if brand_name is not None:
if brand_name.text not in done:
done.append(brand_name.text)
print(brand_name.text)
print(brand_name_en.text)
else:
scroll(drv)
def getBrandDetail(d, brand, done):
all_texts = brand.sibling(className="android.widget.TextView")
name = all_texts[0].get_text()
if name not in done:
res = []
for text in all_texts:
st = text.get_text()
if st in ["进入品牌", ""]:
continue
elif brand_name_en is not None:
if brand_name_en.text not in done:
done.append(brand_name_en.text)
print(brand_name_en.text)
else:
scroll(drv)
elif st.startswith("代表单品"):
continue
else:
res.append(st)
if len(res) in [3, 4]:
if len(res) == 3:
res = [res[0]] + res
done.append(name)
return res
else:
scroll(drv)
continue
return None
def getBrandImg(d, name):
elems = d(className="android.support.v7.widget.RecyclerView").child(className="android.widget.RelativeLayout")
imgele = None
for idx in range(elems.count):
elemIdx = elems.count - 1 - idx
elem = elems[elemIdx]
if elem.child(text=name).count == 1:
imgele = elems[elemIdx - 1].child(resourceId="org.c2h4.afei.beauty:id/sriv")
break
if imgele is None:
print("Can't find image for %s" % name)
image = d.screenshot()
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
outpath = "%s/%s.jpg" % (brand_dir, name)
cropImg(image, outpath, imgele.info['visibleBounds'])
t1 = time.time()
brand_year = brand.find_element_by_id("org.c2h4.afei.beauty:id/tv_year")
t2 = time.time()
print("find year: %s" % (t2 - t1))
print(brand_year.text)
def getAllBrands():
d = u2.connect_usb('d52196830204')
# print(d.dump_hierarchy())
f = open(brand_dir+"/brands.csv", 'a+')
done = []
nocnt = 0
while True:
brand = d(className="android.support.v7.widget.RecyclerView").child(resourceId="org.c2h4.afei.beauty:id/tv_name")[0]
res = getBrandDetail(d, brand, done)
if res is not None:
print(delim.join(res))
f.write(delim.join(res) + "\n")
getBrandImg(d, res[0])
nocnt = 0
else:
nocnt += 1
if nocnt == 5:
break
d.swipe_ext("up", scale=0.2)
brands = d(className="android.support.v7.widget.RecyclerView").child(resourceId="org.c2h4.afei.beauty:id/tv_name")
for idx in range(1, brands.count):
res = getBrandDetail(d, brands[idx], done)
if res is not None:
f.writelines(delim.join(res))
print(delim.join(res) + "\n")
getBrandImg(d, res[0])
t1 = time.time()
brand_desc = brand.find_element_by_id("org.c2h4.afei.beauty:id/tv_desc")
t2 = time.time()
print("find desc: %s" % (t2 - t1))
# while True:
# brand = d(className="android.support.v7.widget.RecyclerView").child(resourceId="org.c2h4.afei.beauty:id/tv_name_en")[0]
# res = getBrandDetail(d, brand, done)
# if res is not None:
# print(delim.join(res))
# f.writelines(delim.join(res))
# d.swipe_ext("down", scale=0.2)
print(brand_desc.text)
scroll(drv)
if __name__=='__main__':
getAllBrands()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment