1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# -*- coding:UTF-8 -*-
# @Time : 2020/10/26 16:10
# @File : api_crawler_test.py
# @email : litao@igengmei.com
# @author : litao
import requests
from crawler.crawler_sys.utils.output_results import retry_get_url
import json, random, urllib
import requests
import json
from colorama import init, Fore, Back
init(autoreset=True)
def test_search_diary(query=''):
url = 'https://backend.igengmei.com/api/search/v6/content?query=%s&tab_type=1&is_first=0&size=100&order_by=0&area_ref=&max_price=100000&min_price=0&ai_tab_type=0&sort_type=0&app_name=com.wanmeizhensuo.zhensuo&version=7.35.0&platform=android&device_id=869412032478155&os_version=10&model=LYA-AL00&screen=1080x2265&lat=40.005167&lng=116.477603&channel=benzhan&manufacturer=HUAWEI&uuid=1277ae88-094c-4484-9357-1b3e4519521e&android_device_id=androidid_7255c3398845cdd5¤t_city_id=beijing' % (query)
diary_id_list = []
tractate_id_list = []
topic_id = []
for index in range(0,6):
response = retry_get_url(url, params={'offset': index * 100})
# print(response.url)
dict_response = response.json()['data']['diaries']
print("这是第%d次请求" % index)
for data in dict_response:
# print(data['data_type'],data['id'])
if data['data_type'] == 19:
if data['id'] not in tractate_id_list:
tractate_id_list.append(data['id'])
# print(data['id'])
elif data['data_type'] == 0:
if data['id'] not in diary_id_list:
diary_id_list.append(data['id'])
# if len(diary_id_list) >= 100 and len(tractate_id_list) >= 100:
# break
print("================================================")
print(diary_id_list,tractate_id_list)
return (diary_id_list,tractate_id_list)
if __name__ == '__main__':
query_list = ["双眼皮",
"瘦脸针",
"脱毛",
"小气泡",
"发型",
"水光针",
"热玛吉",
"光子嫩肤",
"测脸型",
"吸脂",
"玻尿酸",
"鼻综合",
"脸型适合什么发型",
"果酸焕肤",
"双眼皮2388",
"植发",
"线雕",
"开内眼角",
"祛斑",
"美白针",
"搜索项目、商品、医生",
"测发型",
"隆鼻",
"菲洛嘉",
"瘦腿针",
"眼综合",
"祛痘",
"除皱",
"超声刀",
"如何根据脸型测发型",
"皮秒",
"隆胸",
"脸型",
"热拉提",
"超皮秒",
"手术瘦脸",
"皮秒激光",
"微针",
"改善肤质",
"丰胸(隆胸)",
"厚唇改薄",
"点阵激光",
"鼻头",
"韩式半永久纹眉",
"玻尿酸注射",
"下颌角",
"牙齿矫正",
"面部吸脂",
"光子嫩肤88",
"韩式半永久妆",
"毛发移植",
"热玛吉五代",
"自体脂肪填充面部",
"拍照测发型",
"医选",
"颧骨内推",
"牙齿美白",
"面部轮廓",
"瘦脸针限时680",
"鼻部综合",
"自体脂肪填充",
"武汉洪山月目医疗美容",
"吸脂瘦脸",
"黄金微针",
"嗨体",
]
res_list = []
for query in query_list:
print(query)
try:
diary_id_list,tractate_id_list = test_search_diary(query=query)
res_list.append([query,diary_id_list,tractate_id_list])
except:
continue
#break
import pandas as pd
res = pd.DataFrame(res_list)
res.to_csv("save.csv",encoding='gb18030')