Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
C
crawler
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
backend
crawler
Commits
b2246afd
Commit
b2246afd
authored
4 years ago
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
94a03ced
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
5 deletions
+4
-5
crawler_douban.py
crawler_sys/site_crawler_by_redis/crawler_douban.py
+4
-5
No files found.
crawler_sys/site_crawler_by_redis/crawler_douban.py
View file @
b2246afd
...
@@ -109,8 +109,8 @@ class CrawlerDouban():
...
@@ -109,8 +109,8 @@ class CrawlerDouban():
sign
=
"bf7dddc7c9cfe6f7"
sign
=
"bf7dddc7c9cfe6f7"
url_limit
=
url
.
split
(
"?"
)[
0
]
.
replace
(
"http://frodo.douban.com"
,
''
)
url_limit
=
url
.
split
(
"?"
)[
0
]
.
replace
(
"http://frodo.douban.com"
,
''
)
url_limit
=
urllib
.
parse
.
quote
(
url_limit
,
safe
=
''
)
url_limit
=
urllib
.
parse
.
quote
(
url_limit
,
safe
=
''
)
#
ts = str(int(datetime.datetime.now().timestamp()))
ts
=
str
(
int
(
datetime
.
datetime
.
now
()
.
timestamp
()))
ts
=
'1600650372'
#
ts = '1600650372'
url_str
=
'GET&
%
s&
%
s'
%
(
url_limit
,
ts
)
url_str
=
'GET&
%
s&
%
s'
%
(
url_limit
,
ts
)
# print(url_str)
# print(url_str)
sig_sha1
=
hmac
.
new
(
sign
.
encode
(
'utf-8'
),
url_str
.
encode
(
'utf-8'
),
digestmod
=
'SHA1'
)
sig_sha1
=
hmac
.
new
(
sign
.
encode
(
'utf-8'
),
url_str
.
encode
(
'utf-8'
),
digestmod
=
'SHA1'
)
...
@@ -135,7 +135,7 @@ class CrawlerDouban():
...
@@ -135,7 +135,7 @@ class CrawlerDouban():
ts
,
sig
=
self
.
get_sig
(
'/api/v2/group/248952/topics'
)
ts
,
sig
=
self
.
get_sig
(
'/api/v2/group/248952/topics'
)
url_dic
=
{
url_dic
=
{
# "start": None,
# "start": None,
"count"
:
"
10
0"
,
"count"
:
"
2
0"
,
"sortby"
:
"new"
,
"sortby"
:
"new"
,
# "apple": "389276ed556d40cada2e208482b51cd7",
# "apple": "389276ed556d40cada2e208482b51cd7",
# "icecream": "7b92c1aa7b531d1500c6e4905de2ca76",
# "icecream": "7b92c1aa7b531d1500c6e4905de2ca76",
...
@@ -247,7 +247,6 @@ class CrawlerDouban():
...
@@ -247,7 +247,6 @@ class CrawlerDouban():
yield
res
yield
res
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
test
=
CrawlerDouban
()
test
=
CrawlerDouban
()
url
=
'https://weibo.com/p/1644114654/home?from=page_100306&mod=TAB#place'
# releaserUrl = 'http://v.qq.com/vplus/cfa34d96d1b6609f1dccdea65b26b83d'
# releaserUrl = 'http://v.qq.com/vplus/cfa34d96d1b6609f1dccdea65b26b83d'
url_list
=
[
url_list
=
[
"https://www.douban.com/people/new_tag"
"https://www.douban.com/people/new_tag"
...
@@ -259,7 +258,7 @@ if __name__ == '__main__':
...
@@ -259,7 +258,7 @@ if __name__ == '__main__':
# for r in res:
# for r in res:
# print(r)
# print(r)
for
u
in
url_list
:
for
u
in
url_list
:
ttt
=
test
.
releaser_page_by_time
(
1
595755100232
,
1595906959333
,
u
,
output_to_es_register
=
False
,
ttt
=
test
.
releaser_page_by_time
(
1
600531200000
,
1600660917502
,
u
,
output_to_es_register
=
False
,
es_index
=
'crawler-data-raw'
,
es_index
=
'crawler-data-raw'
,
doc_type
=
'doc'
,
releaser_page_num_max
=
4000
,
allow
=
20
)
doc_type
=
'doc'
,
releaser_page_num_max
=
4000
,
allow
=
20
)
for
t
in
ttt
:
for
t
in
ttt
:
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment