Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
C
crawler
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
backend
crawler
Commits
ee5a96d0
Commit
ee5a96d0
authored
3 years ago
by
李小芳
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
fa09f776
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
20 deletions
+3
-20
crawler_xinyang_ask_service_city.py
dev/xinyang_ask_tag/crawler_xinyang_ask_service_city.py
+3
-20
No files found.
dev/xinyang_ask_tag/crawler_xinyang_ask_service_city.py
View file @
ee5a96d0
...
...
@@ -291,20 +291,11 @@ class SoYongSpider(object):
def
run
(
self
,
city_tags
):
get_data_file
=
open
(
self
.
file_name
,
"a+"
,
encoding
=
"utf-8"
)
get_lasted_data
=
[]
self
.
city_list
=
[
city_tags
]
for
city_name
in
self
.
city_list
:
# 热门城市
city_id
=
self
.
cityIdMapping
[
city_name
]
crawler_xinyang
=
CrawlerMain
(
city_id
=
city_id
)
# print(city_name, self.city_list.index(city_name), len(self.city_list) - 1)
if
self
.
city_list
.
index
(
city_name
)
==
len
(
self
.
city_list
)
-
1
:
get_lasted_data
.
append
(
city_name
)
for
keyword
in
self
.
keywords
:
# 热门词
# print(keyword, self.keywords.index(keyword), len(self.keywords) - 1)
if
self
.
keywords
.
index
(
keyword
)
==
len
(
self
.
keywords
)
-
1
and
len
(
get_lasted_data
)
==
1
:
get_lasted_data
.
append
(
keyword
)
for
page
in
range
(
1
,
self
.
page_num
):
# 筛选前100个
word
=
str
(
keyword
+
city_name
)
if
word
not
in
self
.
have_get_service_info
.
keys
()
or
self
.
have_get_service_info
[
word
]
<
10
:
...
...
@@ -326,11 +317,7 @@ class SoYongSpider(object):
time
.
sleep
(
1
)
get_data_file
.
close
()
print
(
"get_lasted_data:"
,
get_lasted_data
)
if
len
(
get_lasted_data
)
==
2
:
return
True
else
:
return
False
def
main
(
city_tag
):
...
...
@@ -339,12 +326,8 @@ def main(city_tag):
today
=
str
(
nowday
)
.
split
()[
0
]
file_name
=
"save_data_"
+
str
(
today
)
+
city_tag
+
".txt"
while
(
True
):
spider_obj
=
SoYongSpider
(
file_name
)
flat
=
spider_obj
.
run
(
city_tags
=
city_tag
)
print
(
"flat:"
,
flat
)
if
flat
==
True
:
break
spider_obj
=
SoYongSpider
(
file_name
)
spider_obj
.
run
(
city_tags
=
city_tag
)
print
(
time
.
time
()
-
begin
)
print
(
"end"
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment