Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
C
crawler
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
backend
crawler
Commits
47a96c79
Commit
47a96c79
authored
3 years ago
by
李小芳
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
19399a66
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
13 deletions
+17
-13
app_soyoung_by_cityname.py
dev/xinyang_ask_tag/app_soyoung_by_cityname.py
+12
-9
app_soyoung_v1.py
dev/xinyang_ask_tag/app_soyoung_v1.py
+5
-4
city.py
dev/xinyang_ask_tag/city.py
+0
-0
No files found.
dev/xinyang_ask_tag/app_soyoung_by_cityname.py
View file @
47a96c79
...
@@ -58,7 +58,10 @@ def send_email_tome():
...
@@ -58,7 +58,10 @@ def send_email_tome():
def
get_service_info
(
city_id
=-
1
,
keyword
=
""
,
city_name
=
""
,
all_skuids
=
[],
get_data_file
=
None
):
def
get_service_info
(
city_id
=-
1
,
keyword
=
""
,
city_name
=
""
,
all_skuids
=
[],
get_data_file
=
None
):
print
(
"get_service_info"
)
print
(
"get_service_info"
)
url
=
'https://api.soyoung.com/v8/superList/index'
url
=
'https://api.soyoung.com/v8/superList/index'
break_flat
=
False
other_city_count
=
0
for
page
in
range
(
1
,
500
):
for
page
in
range
(
1
,
500
):
if
break_flat
==
False
and
other_city_count
<
100
:
data
=
{
'_time'
:
'1626769752'
,
data
=
{
'_time'
:
'1626769752'
,
'ab_id'
:
'C521C79519A5D544390E60FEA08B32DB'
,
'ab_id'
:
'C521C79519A5D544390E60FEA08B32DB'
,
"app_id"
:
42
,
"app_id"
:
42
,
...
@@ -98,9 +101,8 @@ def get_service_info(city_id=-1, keyword="", city_name="", all_skuids=[], get_da
...
@@ -98,9 +101,8 @@ def get_service_info(city_id=-1, keyword="", city_name="", all_skuids=[], get_da
}
}
s
=
random
.
random
()
s
=
random
.
random
()
time
.
sleep
(
s
)
time
.
sleep
(
s
)
count_qita
=
0
response_res
=
requests
.
post
(
url
,
data
,
verify
=
False
)
response_res
=
requests
.
post
(
url
,
data
,
verify
=
False
)
if
response_res
.
status_code
==
200
and
response_res
.
text
and
count_qita
<=
10
:
if
response_res
.
status_code
==
200
and
response_res
.
text
:
response
=
json
.
loads
(
response_res
.
text
)
response
=
json
.
loads
(
response_res
.
text
)
responseData
=
response
.
get
(
"responseData"
,
{})
.
get
(
"data"
)
responseData
=
response
.
get
(
"responseData"
,
{})
.
get
(
"data"
)
for
item
in
responseData
:
for
item
in
responseData
:
...
@@ -135,21 +137,22 @@ def get_service_info(city_id=-1, keyword="", city_name="", all_skuids=[], get_da
...
@@ -135,21 +137,22 @@ def get_service_info(city_id=-1, keyword="", city_name="", all_skuids=[], get_da
service_info
[
'query词'
]
=
keyword
service_info
[
'query词'
]
=
keyword
service_info
[
'城市'
]
=
city_name
service_info
[
'城市'
]
=
city_name
service_info
[
'平台'
]
=
"新氧"
service_info
[
'平台'
]
=
"新氧"
service_info
[
'链接'
]
=
"https://y.soyoung.com/cp{}"
.
format
(
service_info
[
'skuid'
])
service_info
[
'链接'
]
=
"https://y.soyoung.com/cp{}"
.
format
(
service_info
[
'skuid'
])
print
(
service_info
)
print
(
service_info
)
if
service_data
.
get
(
"pid"
)
not
in
all_skuids
:
if
service_data
.
get
(
"pid"
)
not
in
all_skuids
:
get_data_file
.
write
(
json
.
dumps
(
service_info
))
get_data_file
.
write
(
json
.
dumps
(
service_info
))
get_data_file
.
write
(
"
\n
"
)
get_data_file
.
write
(
"
\n
"
)
else
:
else
:
count_qita
+=
1
other_city_count
+=
1
else
:
else
:
print
(
"break"
)
print
(
"break"
)
break
break_flat
=
True
elif
count_qita
>
10
:
print
(
city_id
,
keyword
,
"本地已爬完"
)
break
break
else
:
else
:
print
(
city_id
,
keyword
,
"爬取失败"
)
print
(
city_id
,
keyword
,
"爬取失败"
)
else
:
print
(
page
,
city_id
,
keyword
,
"本地已爬完"
)
def
main
(
city_tag
=
""
):
def
main
(
city_tag
=
""
):
...
@@ -162,7 +165,7 @@ def main(city_tag=""):
...
@@ -162,7 +165,7 @@ def main(city_tag=""):
cityIdMapping
=
{
'北京'
:
'1'
,
'上海'
:
'9'
,
'重庆'
:
'22'
,
'广州市'
:
'289'
,
'深圳市'
:
'291'
,
'郑州市'
:
'240'
,
'武汉市'
:
'258'
,
cityIdMapping
=
{
'北京'
:
'1'
,
'上海'
:
'9'
,
'重庆'
:
'22'
,
'广州市'
:
'289'
,
'深圳市'
:
'291'
,
'郑州市'
:
'240'
,
'武汉市'
:
'258'
,
'长沙市'
:
'275'
,
'南京市'
:
'162'
,
'成都市'
:
'385'
,
'西安市'
:
'438'
,
'杭州市'
:
'175'
}
'长沙市'
:
'275'
,
'南京市'
:
'162'
,
'成都市'
:
'385'
,
'西安市'
:
'438'
,
'杭州市'
:
'175'
}
\
# keywords = ['瘦脸针', '双眼皮', '光子嫩肤', '吸脂', '水光针', '玻尿酸', '热玛吉', '脱毛', '瘦腿针', '鼻综合', '瘦肩针', '下颌角', '线雕', '超声刀',
# keywords = ['瘦脸针', '双眼皮', '光子嫩肤', '吸脂', '水光针', '玻尿酸', '热玛吉', '脱毛', '瘦腿针', '鼻综合', '瘦肩针', '下颌角', '线雕', '超声刀',
# '美白针',
# '美白针',
# '眼综合', '隆鼻', '菲洛嘉', '下巴', '热拉提', '点阵激光', '面部吸脂', '开内眼角', '嗨体', '牙齿矫正', '皮秒', '超皮秒', '植发',
# '眼综合', '隆鼻', '菲洛嘉', '下巴', '热拉提', '点阵激光', '面部吸脂', '开内眼角', '嗨体', '牙齿矫正', '皮秒', '超皮秒', '植发',
...
@@ -175,7 +178,7 @@ def main(city_tag=""):
...
@@ -175,7 +178,7 @@ def main(city_tag=""):
# '艾莉薇', '抽脂', '瘦腿', '玻尿酸丰下巴', '纹眉', '伊肤泉微针', '美白', '厚唇改薄', '面部线雕', '祛疤', '伊婉V', '超皮秒祛斑', '除皱针',
# '艾莉薇', '抽脂', '瘦腿', '玻尿酸丰下巴', '纹眉', '伊肤泉微针', '美白', '厚唇改薄', '面部线雕', '祛疤', '伊婉V', '超皮秒祛斑', '除皱针',
# '开眼角',
# '开眼角',
# '海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼']
# '海菲秀', '假体下巴', '刷酸', '泪沟', '拉皮', '全身吸脂', '缩鼻翼']
keywords
=
[
'欧洲之星fotona4d'
]
keywords
=
[
'欧洲之星fotona4d'
]
# city_list = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"]
# city_list = ["北京", "上海", "广州市", "深圳市", "杭州市", "成都市", "重庆", "南京市", "武汉市", "长沙市", "郑州市", "西安市"]
...
...
This diff is collapsed.
Click to expand it.
dev/xinyang_ask_tag/app_soyoung_v1.py
View file @
47a96c79
...
@@ -190,11 +190,12 @@ if __name__ == '__main__':
...
@@ -190,11 +190,12 @@ if __name__ == '__main__':
# '埋线双眼皮', '菲洛嘉水光针', '双眼皮修复', '欧洲之星', '脂肪填充',
# '埋线双眼皮', '菲洛嘉水光针', '双眼皮修复', '欧洲之星', '脂肪填充',
# '溶脂针', '法令纹', '鼻基底','全切双眼皮', '颧骨内推',
# '溶脂针', '法令纹', '鼻基底','全切双眼皮', '颧骨内推',
# '鼻子', '抽脂', '光子嫩肤m22', '下颌缘提升', 'm22',
# '鼻子', '抽脂', '光子嫩肤m22', '下颌缘提升', 'm22',
# '鼻翼缩小', '欧洲之星fotona4d', '自体脂肪全面部填充', '玻尿酸丰唇', '除皱针',
# '鼻翼缩小', 'fotona4d欧洲之星', '自体脂肪全面部填充', '玻尿酸丰唇', '除皱针',
# '水光', '嗨体祛颈纹',
# '水光', '嗨体祛颈纹','假体隆胸', '英诺小棕瓶', '黄金微雕',
# '眼袋', '乔雅登',
keywords
=
[
'假体隆胸'
,
'英诺小棕瓶'
,
'黄金微雕'
,
keywords
=
[
'眼袋'
,
'乔雅登'
,
'除皱'
,
'颧骨'
,
'艾莉薇'
,
'除皱'
,
'颧骨'
,
'艾莉薇'
,
'瘦腿'
,
'玻尿酸丰下巴'
,
'纹眉'
,
'伊肤泉微针'
,
'美白'
,
'瘦腿'
,
'玻尿酸丰下巴'
,
'纹眉'
,
'伊肤泉微针'
,
'美白'
,
'厚唇改薄'
,
'面部线雕'
,
'祛疤'
,
'伊婉V'
,
'超皮秒祛斑'
,
'厚唇改薄'
,
'面部线雕'
,
'祛疤'
,
'伊婉V'
,
'超皮秒祛斑'
,
'开眼角'
,
'海菲秀'
,
'假体下巴'
,
'刷酸'
,
'泪沟'
,
'拉皮'
,
'全身吸脂'
,
'缩鼻翼'
]
'开眼角'
,
'海菲秀'
,
'假体下巴'
,
'刷酸'
,
'泪沟'
,
'拉皮'
,
'全身吸脂'
,
'缩鼻翼'
]
...
...
This diff is collapsed.
Click to expand it.
dev/xinyang_ask_tag/city.py
View file @
47a96c79
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment