improvement

65af0d2a · 李康 · 48a6ab3c · 65af0d2a · 65af0d2a · 65af0d2a
Commit 65af0d2a authored Oct 22, 2019 by 李康
Showing with 201 additions and 30 deletions

.gitignore .gitignore +2 -1

brands.py brands.py +8 -1

brands.csv brands/brands.csv +12 -8

find_brand.py find_brand.py +96 -0

product.py product.py +56 -15

product_in_list.py product_in_list.py +27 -5

No files found.
--- a/.gitignore
+++ b/.gitignore
 __pycache__
 .idea
-.DS_Store
+.DS_Store
\ No newline at end of file
--- a/brands.py
+++ b/brands.py
@@ -53,7 +53,14 @@ def getAllBrands():
    done = []
    nocnt = 0
    while True:
-        brand = d(className="android.support.v7.widget.RecyclerView").child(resourceId="org.c2h4.afei.beauty:id/tv_name")[0]
+
+        brand = None
+        brands = d(className="android.support.v7.widget.RecyclerView").child(resourceId="org.c2h4.afei.beauty:id/tv_name")
+        for temp in brands:
+            if temp.info['bounds']['top'] == temp.info['visibleBounds']['top']:
+                brand = temp
+                break
+
        res = getBrandDetail(d, brand, done)
        if res is not None:
            print(delim.join(res))

--- a/brands/brands.csv
+++ b/brands/brands.csv
@@ -16,7 +16,8 @@
 雪花秀; SULWHASOO; 韩国 1997年成立; 韩国爱茉莉太平洋集团的高端护肤线，将人参应用于护肤领域
 香缇卡; 香缇卡; 法国 1997年成立; 主打天然草本成分，其中钻石系列最为知名
 馥蕾诗; FRESH; 法国 1991年成立; 于2003年推出古源修护面霜，打开贵妇市场
-香奈儿; Chanel; 法国 1910年成立; 顶级奢侈品牌，已推出多个护肤彩妆系列大宝; DaBao; 中国内地 1999年成立; 国民级护肤品牌，经典产品大宝SOD畅销至今
+香奈儿; Chanel; 法国 1910年成立; 顶级奢侈品牌，已推出多个护肤彩妆系列
+大宝; DaBao; 中国内地 1999年成立; 国民级护肤品牌，经典产品大宝SOD畅销至今
 旁氏; POND'S; 美国 1864年成立; 联合利华旗下护肤品牌，明星单品旁氏米粹是平价温和洁面的代表
 曼秀雷敦; MENTHOLATUM; 美国 1889年成立; 大众护肤品牌，旗下护唇系列最为知名
 启初; Giving; 中国内地 2013年成立; 上海家化旗下婴幼儿护理品牌，产品整体温和度较高
@@ -117,10 +118,9 @@
 至本; 至本; 中国内地 2012年成立; 新国货品牌，其“舒颜”系列专门针对屏障受损肌肤
 敏感话题; D Program; 日本 1997年成立; 资生堂旗下敏感肌品牌，针对干敏、油敏分别推出不同系列
 可复美; 可复美; 中国内地 1999年成立; 主打类人胶原蛋白面膜，适合医美术后与敏感期护理
-敷尔佳; 敷尔佳; 中国内地 1996年成立; 主打医用透明质酸钠面膜，适合医美术后与敏感期护理百雀羚; Pechoin; 中国内地 1931年成立; 始创于1931年，主打草本植物提取成分
-薇诺娜; WINONA; 中国内地 2008年成立; 专注敏感护理的国货药妆代表，产品经由国内多家医院的临床验证
-
-主打成分为青刺果、马齿苋提取物
+敷尔佳; 敷尔佳; 中国内地 1996年成立; 主打医用透明质酸钠面膜，适合医美术后与敏感期护理
+百雀羚; Pechoin; 中国内地 1931年成立; 始创于1931年，主打草本植物提取成分
+薇诺娜; WINONA; 中国内地 2008年成立; 专注敏感护理的国货药妆代表，产品经由国内多家医院的临床验证，主打成分为青刺果、马齿苋提取物
 大宝; DaBao; 中国内地 1999年成立; 国民级护肤品牌，经典产品大宝SOD蜜畅销至今
 自然堂; CHANDO; 中国内地 2001年成立; 伽蓝集团旗下的主力品牌，主打成分喜马拉雅冰川水
 相宜本草; 相宜本草; 中国内地 1999年成立; 与上海中医药大学联合研制，主打成分大红景天根提取物
@@ -143,7 +143,9 @@
 完美; PERFECT; 中国内地 1994年成立; 旗下最知名单品是完美芦荟胶，畅销几十年
 隆力奇; Longrich; 中国内地 1986年成立; 国产日化企业，早期以蛇业起家，产品主打蛇油成分
 林清轩; 林清轩; 中国内地 2003年成立; 走中高端线的国产品牌，旗下最知名单品是山茶花润肤油
-稚优泉; chioture; 中国内地 2009年成立; 年轻的化妆品品牌，彩妆和护肤系类都比较平价高夫; 高夫; 中国内地 1992年成立; 中国市场上第一个男士个人护理品牌资生堂; SHISEIDO; 日本 1872年成立; 亚洲第一的化妆品集团，旗下子品牌超过20个，拥有众多技术专利
+稚优泉; chioture; 中国内地 2009年成立; 年轻的化妆品品牌，彩妆和护肤系类都比较平价
+高夫; 高夫; 中国内地 1992年成立; 中国市场上第一个男士个人护理品牌
+资生堂; SHISEIDO; 日本 1872年成立; 亚洲第一的化妆品集团，旗下子品牌超过20个，拥有众多技术专利
 珂润; Curel; 日本 1999年成立; 日系舒敏代表品牌，主打独家神经酰胺成分
 城野医生; Dr.Ci:Labo; 日本 1999年成立; 由医学博士创立的著名药妆品牌，2018年被强生集团收购
 肌肤之钥; CLE DE PEAU; 日本 1997年成立; 资生堂旗下高端品牌，护肤线以保湿、抗衰为主，彩妆线更有众多口碑单品
@@ -170,7 +172,8 @@
 佑天兰; utena; 日本 1927年成立; 历史悠久的日系美容品牌，面膜品类最为知名
 蜜浓; MINON; 日本 1973年成立; 第一三共旗下品牌，主打氨基酸成分，滋润保湿、温和度高
 肌美精; Kracie; 日本 2007年成立; Kracie旗下品牌，以面膜品类为主打
-近江兄弟; MENTURM; 日本 1920年成立; 日本最大的防晒霜生产商，明星单品小熊防晒价格非常亲民后; Whoo; 韩国 2003年成立; 所属韩国LG集团，主打宫廷风，韩系高端护肤品代表
+近江兄弟; MENTURM; 日本 1920年成立; 日本最大的防晒霜生产商，明星单品小熊防晒价格非常亲民
+后; Whoo; 韩国 2003年成立; 所属韩国LG集团，主打宫廷风，韩系高端护肤品代表
 雪花秀; SULWHASOO; 韩国 1997年成立; 爱茉莉太平洋集团高端护肤线，将人参成分应用于护肤领域
 悦诗风吟; Innisfree; 韩国 2000年成立; 同属爱茉莉太平洋集团，产品品类齐全，专卖店众多
 爱和纯; AHC; 韩国 1999年成立; 韩系功效护肤品牌，以透明质酸、维生素B5为主打，2017年被联合利华收购
@@ -258,7 +261,8 @@
 欧树; NUXE; 法国 1957年成立; 主打“天然植物护肤”概念，旗下卸妆凝胶知名度较高
 朵梵; DARPHIN; 法国 1958年成立; 雅诗兰黛旗下的芳疗护肤品牌，由皮肤科医生创立
 蜜葳特; melvita; 法国 1983年成立; 从属于欧舒丹集团，明星单品为玫瑰水
-香缇卡; 香缇卡; 法国 1997年成立; 主打天然草本成分，其中钻石系列最为知名妮维雅; NIVEA; 德国 1911年成立; 德国拜尔斯道夫公司旗下的护肤品与身体护理品品牌
+香缇卡; 香缇卡; 法国 1997年成立; 主打天然草本成分，其中钻石系列最为知名
+妮维雅; NIVEA; 德国 1911年成立; 德国拜尔斯道夫公司旗下的护肤品与身体护理品品牌
 阿玛尼; Giorgio Armani; 意大利 1975年成立; 世界知名奢侈品牌，创立于意大利米兰，旗下化妆品和香水业务归属于欧莱雅集团
 怡思丁; ISDIN; 西班牙 1975年成立; 西班牙本土销量第一的药妆品牌，除了美容护肤，还有母婴护肤产品和美体系列产品
 英国AA网; AA Skincare; 英国 1984年成立; 英国品牌，主要出售天然芳香产品，比如精油等

--- a/find_brand.py
+++ b/find_brand.py
+import sys, os, time, re
+
+list_dir = "./"
+brand_dir = "brands/"
+def findBrand(argv):
+    listf = open(list_dir + "products.csv", "r")
+    brandf = open(brand_dir + "brands.csv", "r")
+
+    outputbrand = "related_brands.csv"
+    outputproduct = "new_product.csv"
+    obf = open(outputbrand, "w")
+    opf = open(outputproduct, "w")
+
+    brands = []
+    brandlines = {}
+    for line in brandf:
+        brand = line.split("; ")[0]
+        brandlines[brand] = line
+
+        # print(brand)
+        brands.append(brand)
+
+    not_found = []
+    for line in listf:
+        fields = line.split("; ")
+        key = None
+        if fields[0] != "":
+            key = fields[0]
+        elif fields[1] != "":
+            key = fields[1]
+        else:
+            print("No product key !!!!!!!!")
+            return
+        found_flag = False
+        for brand in brands:
+            if key.startswith(brand):
+                found_flag = True
+                # print("%s-------->%s" % (key, brand))
+                newbls = brandlines[brand].split("; ")
+                newbls = newbls[:2] + [newbls[2].split(" ")[0]] + newbls[3:]
+                if len(newbls) != 4:
+                    print(newbls)
+                    return
+                obf.write("; ".join(newbls))
+
+                newline = fields[:2]
+
+                score = float(fields[2]) / 2
+                newline.append(str(score))
+
+                comment_num = ''
+                for ch in fields[3]:
+                    if ch == "人":
+                        break
+                    try:
+                        temp = int(ch)
+                        comment_num += ch
+                    except:
+                        continue
+                newline.append(str(comment_num))
+
+                if fields[4] != "":
+                    temp = fields[4].split("·")
+                    if len(temp) == 2:
+                        newline.append(re.sub(r'\s+','', temp[0]))
+                        price = re.sub(r'\s+','', temp[1])
+                        price = price.split('/')[0]
+                        if price[0] != "¥":
+                            print(price)
+                            return
+                        newline.append(price[1:])
+                    else:
+                        newline.append(re.sub(r'\s+','', temp[0]))
+                        newline.append("")
+                else:
+                    newline += ["", ""]
+
+                newline += fields[5:]
+                if len(newline) != 8:
+                    print(newline)
+                    return
+
+                opf.write("; ".join(newline))
+                break
+        if not found_flag:
+            not_found.append(key)
+
+
+    # print("===================totally %s product not found=========================" % len(not_found))
+    # for nf in not_found:
+    #     print(nf)
+
+
+if __name__=='__main__':
+    findBrand(sys.argv)
\ No newline at end of file
--- a/product.py
+++ b/product.py
@@ -8,6 +8,11 @@ filter_scroll_steps = 2

 def clickBrand(d, brand_name):
    print("Try finding brand %s......" % brand_name)
+    if gotoProductFilter(d, 10) is None:
+        d(scrollable=True).scroll.vert.toBeginning()
+    else:
+        d.press("back")
+
    while True:
        names = d(className="android.support.v7.widget.RecyclerView").child(resourceId="org.c2h4.afei.beauty:id/tv_name")
        for name in names:
@@ -40,10 +45,14 @@ def adjustProductFilterPosition(d):
            break
    return filter

-def gotoProductFilter(d):
+def gotoProductFilter(d, time_cnt=None):
+    cnt = 0
    while d(resourceId="org.c2h4.afei.beauty:id/tv_filter").count == 0:
        d.swipe_ext("up", scale=0.5)
        time.sleep(1)
+        cnt += 1
+        if time_cnt is not None and cnt > time_cnt:
+            return
    return adjustProductFilterPosition(d)

 def getAllFilterTypes(d):
@@ -77,9 +86,14 @@ def getAllFilterTypes(d):
 def filterProduct(d, ftype_name):
    print("Select the filter type %s" % ftype_name)
    ftype = d(resourceId="org.c2h4.afei.beauty:id/tv_type", text=ftype_name)
+    cnt = 0
    while ftype.count == 0:
-        d(scrollable=True).scroll(steps=filter_scroll_steps)
+        if cnt < 10:
+            d(scrollable=True).scroll.vert(steps=filter_scroll_steps)
+        else:
+            d(scrollable=True).scroll.vert.backward(steps=filter_scroll_steps)
        ftype = d(resourceId="org.c2h4.afei.beauty:id/tv_type", text=ftype_name)
+        cnt += 1
    ftype.click()


@@ -243,13 +257,14 @@ def resetRuntime(d, brand_name, ftype_name, done):
 def getAllProductUiObj(d):
    return d(resourceId="org.c2h4.afei.beauty:id/rv_container").child(className="android.widget.RelativeLayout")

-def getAllProducts(d, f, brand_name, product_type):
+def getAllProducts(d, f, brand_name, product_type, starting_product_name = None):
    done = []
    nocnt = 0
    scraped_cnt = 0
    scroll_scale = 0.1

-    debug = False
+    if starting_product_name is not None:
+        gotoSpecificProduct(d, starting_product_name)
    while True:
        products = getAllProductUiObj(d)
        product = None
@@ -258,13 +273,9 @@ def getAllProducts(d, f, brand_name, product_type):
                product = temp
                break
        try:
-            # switch to open reset runtime debug
-            # if scraped_cnt == 5 and debug is not None:
-            #     debug = True
-            res = getProductDetail(d, brand_name, product, done, debug)
+            res = getProductDetail(d, brand_name, product, done)
        except RuntimeError:
            resetRuntime(d, brand_name, product_type, done)
-            debug = None
            nocnt = 0
            continue
        if res is not None:
@@ -281,22 +292,48 @@ def getAllProducts(d, f, brand_name, product_type):

    # handle the last few products in the list
    products = getAllProductUiObj(d)
-    for idx in range(1, products.count):
-        res = getProductDetail(d, brand_name, products[idx], done)
+    pidx = 1
+    while pidx < products.count:
+        try:
+            res = getProductDetail(d, brand_name, products[pidx], done)
+            pidx += 1
+        except RuntimeError:
+            resetRuntime(d, brand_name, product_type, done)
        if res is not None:
            saveProduct(f, res, product_type)
            scraped_cnt += 1

    return scraped_cnt

+def gotoSpecificProduct(d, productName):
+    print("Goto product of %s......" % productName)
+    while True:
+        products = getAllProductUiObj(d)
+        for product in products:
+            key, keyObj = getProductKey(product)
+            # print(key)
+            if key == productName:
+                return
+        d.swipe_ext("up", scale=0.4)
+
 def getProductByType(argv):
    brand_name = argv[1]
    if not os.path.exists(brand_name):
        os.makedirs(brand_name)

    starting_brand_name = None
+    starting_product_name = None
    if len(argv) == 3:
        starting_brand_name = argv[2]
+        with open(brand_name + "/products.csv", 'r') as f:
+            temp = None
+            for line in f.readlines():
+                temp = line
+            last = temp.split("; ")
+            if last[0] != "":
+                starting_product_name = last[0]
+            else:
+                starting_product_name = last[1]

    d = u2.connect_usb('d52196830204')
    print(d.app_current())
@@ -333,15 +370,19 @@ def getProductByType(argv):
            #     continue
            filterProduct(d, ftype_name)
            time.sleep(1)
-            temp_cnt = getAllProducts(d, f, brand_name, ftype_name)
+            if ftype_names.index(ftype_name) == 0 and starting_product_name is not None:
+                temp_cnt = getAllProducts(d, f, brand_name, ftype_name, starting_product_name)
+            else:
+                temp_cnt = getAllProducts(d, f, brand_name, ftype_name)
            f.flush()
            print("--------------------- Brand Sub-summury ---------------------")
            print("%s %s products are scraped from %s" % (temp_cnt, ftype_name, brand_name))
            print("-------------------------------------------------------------")
            scraped_cnt += temp_cnt
-            # start select another type
-            filter = getProductFilterUiObj(d)
-            filter.click()
+            if ftype_names.index(ftype_name) != len(ftype_names) - 1:
+                # start select another type
+                filter = getProductFilterUiObj(d)
+                filter.click()
    print("--------------------- Brand Summury ---------------------")
    print("%s products are scraped from %s" % (scraped_cnt, brand_name))
    print("---------------------------------------------------------")

--- a/product_in_list.py
+++ b/product_in_list.py
@@ -5,7 +5,7 @@ import sys, os, time

 delim = "; "
 filter_scroll_steps = 2
-output_dir = "list"
+output_dir = "list2"

 def cropImg(img, out, bounds):
    delta = 20 + 28
@@ -157,8 +157,8 @@ def getAllProducts(d, f, product_type):

    return scraped_cnt

-tap_list = ["洁面", "护肤水", "精华", "乳液·面霜", "防晒", "眼部", "清洁面膜", "面膜", "唇部", "身体"]
-# tap_list = ["卸妆", "妆前", "粉底", "口红", "眉部", "眼影", "眼线", "睫毛", "腮红", "定妆", "遮瑕·修容", "香水"]
+# tap_list = ["洁面", "护肤水", "精华", "乳液·面霜", "防晒", "眼部", "清洁面膜", "面膜", "唇部", "身体"]
+tap_list = ["卸妆", "妆前", "粉底", "口红", "眉部", "眼影", "眼线", "睫毛", "腮红", "定妆", "遮瑕·修容", "香水"]
 def getProductByType(argv):
    target_idx = int(argv[1])
    d = u2.connect_usb('d52196830204')
@@ -181,5 +181,27 @@ def getProductByType(argv):
        print("-------------------------------------------------------------")
    d.freeze_rotation(False)

+def getProductByType2(argv):
+    d = u2.connect_usb('d52196830204')
+    print(d.app_current())
+    d.freeze_rotation()
+
+    with open("%s/products.csv" % output_dir, 'a+') as f:
+        for tap in tap_list:
+            tap_ui_obj = d(text=tap)
+            if tap_ui_obj.count == 1:
+                d(text=tap).click()
+            else:
+                print("Tap %s (%d) is not found!" % (tap, tap_ui_obj.count))
+                return
+            time.sleep(1)
+            temp_cnt = getAllProducts(d, f, tap)
+            f.flush()
+            print("--------------------- tap summury ---------------------")
+            print("%s products are scraped from %s" % (temp_cnt, tap))
+            print("-------------------------------------------------------------")
+    d.freeze_rotation(False)
+
 if __name__=='__main__':
-    getProductByType(sys.argv)
\ No newline at end of file
+    getProductByType(sys.argv)
+    # getProductByType2(sys.argv)
\ No newline at end of file