Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
saturn
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
saturn
Commits
4155d54d
Commit
4155d54d
authored
5 years ago
by
haowang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix code
parent
f47dfb70
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
555 additions
and
2 deletions
+555
-2
__init__.py
api/management/__init__.py
+0
-0
__init__.py
api/management/commands/__init__.py
+0
-0
reply_commands.py
api/management/commands/reply_commands.py
+297
-0
reply_crawl.py
api/utils/reply_crawl.py
+256
-0
pictorial.py
api/views/pictorial.py
+2
-2
No files found.
api/management/__init__.py
0 → 100644
View file @
4155d54d
This diff is collapsed.
Click to expand it.
api/management/commands/__init__.py
0 → 100644
View file @
4155d54d
This diff is collapsed.
Click to expand it.
api/management/commands/reply_commands.py
0 → 100644
View file @
4155d54d
# 读取json文件 调用api 将数据导入库
import
os
from
datetime
import
datetime
import
json
import
requests
from
random
import
randint
from
django.core.management
import
BaseCommand
from
engine.rpc
import
rpc_invoker
from
api.utils.upload
import
upload_image
from
api.cache.cache
import
ins_cache
from
libs.user
import
get_user_by_ids
from
alpha_types.venus
import
ERROR
from
alpha_types.venus
import
GRAP_PLATFORM
from
engine.logger
import
info_logger
,
error_logger
,
logging_exception
IMAGE_SUFFIX
=
'-w'
FILE_PATH
=
'/Users/haowei/Desktop/xhs/'
class
Command
(
BaseCommand
):
"""
评论爬取
"""
user_id_start
=
241757306
# end 241806255
del_cache_keys
=
[]
create_faild_topic_list
=
[]
create_faild_pictorial_list
=
[]
top_comment_error_create
=
[]
second_topic_comments
=
[]
second_topic_error_comments
=
[]
second_pictorial_comments
=
[]
second_pictorial_error_comments
=
[]
second_reply_error_create
=
[]
def
del_cache
(
self
):
for
obj
in
self
.
del_cache_keys
:
ins_cache
.
delete
(
obj
)
def
get_random_user_id
(
self
):
# 随机获取用户ID
while
True
:
index
=
randint
(
0
,
5000
)
user_id
=
self
.
user_id_start
+
index
data
=
rpc_invoker
[
'venus/community/user/is_shadow'
](
user_id
=
user_id
)
.
unwrap
()
if
not
data
:
continue
ret
=
data
.
get
(
'user_id'
)
if
ret
:
return
user_id
def
get_user_id
(
self
,
id_
,
platform
):
# 获取用户ID 缓存记录保留用户关系
cache_key
=
'grap:{}:{}'
.
format
(
platform
,
id_
)
exist_key
=
'grap:{}:{}'
value
=
ins_cache
.
get
(
cache_key
)
user_id
=
None
if
not
value
:
while
True
:
user_id
=
self
.
get_random_user_id
()
exist
=
exist_key
.
format
(
platform
,
user_id
)
if
not
ins_cache
.
get
(
exist
):
ins_cache
.
set
(
exist
,
id_
)
self
.
del_cache_keys
.
append
(
exist
)
break
ins_cache
.
set
(
cache_key
,
user_id
)
self
.
del_cache_keys
.
append
(
exist
)
else
:
user_id
=
int
(
value
)
return
user_id
def
get_json_data_from_dir
(
self
,
is_topic
=
None
,
is_pictorial
=
None
):
# 获取目录文件数据
ret
=
[]
if
is_topic
:
file_path
=
FILE_PATH
+
'topic/'
if
is_pictorial
:
file_path
=
FILE_PATH
+
'pictorial/'
filenames
=
[]
for
_
,
_
,
names
in
os
.
walk
(
file_path
):
filenames
=
names
for
filename
in
filenames
:
ret
.
append
(
self
.
get_file_json_data
(
file_path
+
filename
))
return
ret
def
get_file_json_data
(
self
,
file
):
# 获取文件数据
data
=
None
with
open
(
file
,
'r'
)
as
f
:
content
=
f
.
read
()
if
content
.
startswith
(
u'
\ufeff
'
):
content
=
content
.
encode
(
'utf8'
)[
3
:]
.
decode
(
'utf8'
)
data
=
json
.
loads
(
content
)
return
data
def
get_image_size
(
self
,
image_url
):
# 获取图片宽高
try
:
url
=
image_url
+
IMAGE_SUFFIX
+
'?imageInfo'
response
=
requests
.
request
(
"GET"
,
url
)
info
=
response
.
json
()
return
info
.
get
(
'width'
),
info
.
get
(
'height'
)
except
Exception
as
e
:
logging_exception
()
return
None
,
None
def
image_info
(
self
,
urls
):
# 获取图片信息
ret
=
[]
for
url
in
urls
:
image_url
=
upload_image
(
url
)
while
not
image_url
:
image_url
=
upload_image
(
url
)
width
,
height
=
self
.
get_image_size
(
image_url
)
while
not
width
and
not
height
:
width
,
height
=
self
.
get_image_size
(
image_url
)
ret
.
append
(
{
'url'
:
image_url
.
replace
(
'http://alpha.gmeiapp.com/'
,
''
),
'height'
:
height
,
'width'
:
width
,
}
)
return
ret
def
revise_comments
(
self
,
comment
):
ret
=
[]
comment
[
'content'
]
=
comment
.
get
(
'comment'
)
reply
=
comment
.
pop
(
'reply'
,
None
)
if
not
reply
:
return
comment
,
ret
for
info
in
reply
:
info
[
'reply_id'
]
=
comment
.
get
(
'id'
)
info
[
'type'
]
=
comment
.
get
(
'type'
)
ret
.
append
(
info
)
return
comment
,
ret
def
create_comment
(
self
,
comment
,
platform
):
ret
=
rpc_invoker
[
'venus/community/crawl/replys'
](
data
=
[
comment
],
platform
=
platform
,
topic_id
=
comment
.
get
(
'topic_id'
),
pictorial_id
=
comment
.
get
(
'pictorial_id'
))
.
unwrap
()
if
not
ret
:
self
.
second_reply_error_create
.
append
(
comment
)
def
create_topic
(
self
,
topics
,
platform
):
count
=
0
for
topic
in
topics
[:
100
]:
count
+=
1
topic_comments
=
topic
.
pop
(
'comments'
,
None
)
images
=
topic
.
pop
(
'image'
)
topic
[
'images'
]
=
self
.
image_info
(
images
)
topic
[
'user_id'
]
=
self
.
get_user_id
(
id_
=
topic
.
get
(
'id'
),
platform
=
platform
)
topic
.
pop
(
'user'
)
print
(
'-------- topic current count: '
,
count
)
topic_obj
=
rpc_invoker
[
'venus/community/crawl/topic'
](
data
=
topic
,
platform
=
platform
,
pictorial_id
=
None
)
.
unwrap
()
if
not
topic_obj
:
self
.
create_faild_topic_list
.
append
(
topic
.
get
(
'id'
))
continue
print
(
'-------- return topic info: '
,
topic_obj
)
if
not
topic_comments
:
continue
if
platform
==
GRAP_PLATFORM
.
XIAOHONGSHU
:
for
topic_comment
in
topic_comments
:
topic_comment
[
'topic_id'
]
=
topic_obj
.
get
(
'id'
)
top_comment
,
comments
=
self
.
revise_comments
(
topic_comment
)
top_comment
[
'user_id'
]
=
self
.
get_user_id
(
id_
=
top_comment
.
get
(
'user'
)
.
get
(
'id'
),
platform
=
platform
)
top_comment
.
pop
(
'user'
)
ret
=
rpc_invoker
[
'venus/community/crawl/replys'
](
data
=
[
top_comment
],
platform
=
platform
,
topic_id
=
topic_obj
.
get
(
'id'
),
pictorial_id
=
None
)
.
unwrap
()
if
not
ret
:
self
.
top_comment_error_create
.
append
(
top_comment
)
continue
if
not
comments
:
continue
top_id
=
ret
.
get
(
'reply_ids'
)[
0
]
for
obj
in
comments
:
obj
[
'user_id'
]
=
self
.
get_user_id
(
id_
=
obj
.
get
(
'user'
)
.
get
(
'id'
),
platform
=
platform
)
obj
[
'top_id'
]
=
top_id
obj
.
pop
(
'user'
)
# rpc_invoker['venus/community/crawl/replys'](data=comments, platform=platform, topic_id=topic_id, pictorial_id=pictorial_id, top_id=top_id).unwrap()
self
.
second_topic_comments
.
extend
(
comments
)
# print('-------- topic_comments:', self.second_topic_comments)
# print('-------- top_comment_error_create:', self.top_comment_error_create)
self
.
del_cache
()
return
None
,
None
def
create_pictorial
(
self
,
pictorial
,
platform
):
topics
=
[]
if
not
pictorial
:
return
None
,
None
pictorial_comments
=
pictorial
.
pop
(
'comments'
,
None
)
images
=
self
.
image_info
(
pictorial
.
pop
(
'image'
))
index
=
0
for
obj
in
images
:
index
+=
1
topics
.
append
({
'id'
:
pictorial
.
get
(
'id'
)
+
str
(
index
),
'content'
:
pictorial
.
get
(
'content'
),
'images'
:
[
obj
],
'create_time'
:
pictorial
.
get
(
'create_time'
),
'user_id'
:
self
.
get_user_id
(
id_
=
obj
.
get
(
'url'
),
platform
=
platform
)
})
pictorial
[
'user_id'
]
=
self
.
get_user_id
(
id_
=
pictorial
.
get
(
'id'
),
platform
=
platform
)
pictorial
[
'description'
]
=
pictorial
.
get
(
'content'
)
# 榜单名称取爬取内容的前20字符
index_end
=
20
if
len
(
pictorial
.
get
(
'content'
))
<
index_end
:
index_end
=
len
(
pictorial
.
get
(
'content'
))
-
1
pictorial
[
'name'
]
=
pictorial
.
get
(
'content'
)[:
index_end
]
pictorial_obj
=
rpc_invoker
[
'venus/community/crawl/pictorial'
](
data
=
pictorial
,
platform
=
platform
)
.
unwrap
()
if
not
pictorial_obj
:
self
.
create_faild_pictorial_list
.
append
(
pictorial
)
pictorial_id
=
pictorial_obj
.
get
(
'id'
)
if
topics
:
for
obj
in
topics
:
rpc_invoker
[
'venus/community/crawl/topic'
](
data
=
obj
,
platform
=
platform
,
pictorial_id
=
pictorial_id
)
.
unwrap
()
if
pictorial_comments
:
if
platform
==
GRAP_PLATFORM
.
XIAOHONGSHU
:
for
pictorial_comment
in
pictorial_comments
:
pictorial_comment
[
'pictorial_id'
]
=
pictorial_id
top_comment
,
comments
=
self
.
revise_comments
(
pictorial_comment
)
top_comment
[
'user_id'
]
=
self
.
get_user_id
(
id_
=
top_comment
.
get
(
'user'
)
.
get
(
'id'
),
platform
=
platform
)
top_comment
.
pop
(
'user'
)
ret
=
rpc_invoker
[
'venus/community/crawl/replys'
](
data
=
[
top_comment
],
platform
=
platform
,
pictorial_id
=
pictorial_id
)
.
unwrap
()
if
not
ret
:
self
.
top_comment_error_create
.
append
(
top_comment
)
continue
if
not
comments
:
continue
top_id
=
ret
.
get
(
'reply_ids'
)[
0
]
for
obj
in
comments
:
obj
[
'user_id'
]
=
self
.
get_user_id
(
id_
=
obj
.
get
(
'user'
)
.
get
(
'id'
),
platform
=
platform
)
obj
[
'top_id'
]
=
top_id
obj
.
pop
(
'user'
)
# rpc_invoker['venus/community/crawl/replys'](data=comments, platform=platform, topic_id=topic_id, pictorial_id=pictorial_id, top_id=top_id).unwrap()
self
.
second_pictorial_comments
.
extend
(
comments
)
return
None
,
None
def
handle
(
self
,
*
args
,
**
options
):
platform
=
4
# 帖子
print
(
'----- start deal topic at {} -----'
.
format
(
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S
%
f'
)))
topic_data
=
self
.
get_json_data_from_dir
(
is_topic
=
1
)
self
.
create_topic
(
topics
=
topic_data
,
platform
=
platform
)
print
(
'-------- create_faild_topic_list:'
,
len
(
self
.
create_faild_topic_list
))
print
(
'-------- second_topic_comments:'
,
len
(
self
.
second_topic_comments
))
print
(
'----- end deal topic at {} -----'
.
format
(
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S
%
f'
)))
# 榜单
print
(
'----- start deal pictorial at {} -----'
.
format
(
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S
%
f'
)))
pictorial_data
=
self
.
get_json_data_from_dir
(
is_pictorial
=
1
)
count
=
0
for
pictorial
in
pictorial_data
[:
50
]:
count
+=
1
print
(
'------- current pictorial count :'
,
count
)
self
.
create_pictorial
(
pictorial
=
pictorial
,
platform
=
platform
)
print
(
'-------- create_faild_pictorial_list:'
,
len
(
self
.
create_faild_pictorial_list
))
print
(
'-------- top_comment_error_create:'
,
len
(
self
.
top_comment_error_create
))
print
(
'----- end deal pictorial at {} -----'
.
format
(
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S
%
f'
)))
# 二级评论
print
(
'----- start deal second topic reply at {} -----'
.
format
(
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S
%
f'
)))
count
=
0
for
topic_comment
in
self
.
second_topic_comments
[:
10
]:
count
+=
1
print
(
'------- current second topic reply count :'
,
count
)
self
.
create_comment
(
comment
=
topic_comment
,
platform
=
platform
)
print
(
'-------- second_reply_error_create:'
,
len
(
self
.
second_reply_error_create
))
print
(
'----- end deal second topic reply at {} -----'
.
format
(
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S
%
f'
)))
if
len
(
self
.
top_comment_error_create
)
>
0
:
print
(
'-------- second_topic_comments:'
,
len
(
self
.
second_topic_comments
))
if
len
(
self
.
create_faild_topic_list
)
>
0
:
print
(
'-------- create_faild_topic_list:'
,
len
(
self
.
create_faild_topic_list
))
if
len
(
self
.
create_faild_pictorial_list
)
>
0
:
print
(
'-------- create_faild_pictorial_list:'
,
len
(
self
.
create_faild_pictorial_list
))
self
.
del_cache
()
This diff is collapsed.
Click to expand it.
api/utils/reply_crawl.py
0 → 100644
View file @
4155d54d
# 读取json文件 调用api 将数据导入库
import
os
from
datetime
import
datetime
import
json
import
requests
from
random
import
randint
from
api.views.base_view
import
BaseView
from
api.utils.sensitive
import
Sensitive
from
api.utils.upload
import
upload_image
from
api.cache.cache
import
ins_cache
from
libs.user
import
get_user_by_ids
from
alpha_types.venus
import
ERROR
from
alpha_types.venus
import
GRAP_PLATFORM
from
engine.logger
import
info_logger
,
error_logger
,
logging_exception
FILE_PATH
=
'/Users/haowei/Desktop/xhs/'
API_URL_PROD
=
'http://saturn.iyanzhi.com/api/v1/pictorial/create'
API_URL_TEST
=
'http://saturn.gmapp.env/api/v1/pictorial/create'
API_URL_BENDI
=
'http://127.0.0.1:8004/api/v1/pictorial/create'
class
ReplyBatchCreate
(
BaseView
):
"""
评论爬取
"""
user_id_start
=
241757306
# end 241806255
del_cache_keys
=
[]
first_replys
=
[]
second_replys
=
[]
create_faild_topic_list
=
[]
total_topic_comments
=
[]
total_pictorial_comments
=
[]
def
del_cache
(
self
):
for
obj
in
self
.
del_cache_keys
:
ins_cache
.
delete
(
obj
)
def
get_random_user_id
(
self
):
# 随机获取用户ID
while
True
:
index
=
randint
(
0
,
5000
)
user_id
=
self
.
user_id_start
+
index
error
,
data
=
self
.
call_rpc
(
'venus/community/user/is_shadow'
,
user_id
=
user_id
)
ret
=
data
.
get
(
'user_id'
)
if
ret
:
return
user_id
def
get_user_id
(
self
,
id_
,
platform
):
# 获取用户ID 缓存记录保留用户关系
cache_key
=
'grap:{}:{}'
.
format
(
platform
,
id_
)
exist_key
=
'grap:{}:{}'
value
=
ins_cache
.
get
(
cache_key
)
user_id
=
None
if
not
value
:
while
True
:
user_id
=
self
.
get_random_user_id
()
exist
=
exist_key
.
format
(
platform
,
user_id
)
if
not
ins_cache
.
get
(
exist
):
ins_cache
.
set
(
exist
,
id_
)
self
.
del_cache_keys
.
append
(
exist
)
break
ins_cache
.
set
(
cache_key
,
user_id
)
self
.
del_cache_keys
.
append
(
exist
)
else
:
user_id
=
int
(
value
)
return
user_id
def
get_json_data_from_dir
(
self
,
is_topic
=
None
,
is_pictorial
=
None
):
# 获取目录文件数据
ret
=
[]
if
is_topic
:
file_path
=
FILE_PATH
+
'topic/'
if
is_pictorial
:
file_path
=
FILE_PATH
+
'pictorial/'
filenames
=
[]
for
_
,
_
,
names
in
os
.
walk
(
file_path
):
filenames
=
names
for
filename
in
filenames
:
ret
.
append
(
self
.
get_file_json_data
(
file_path
+
filename
))
return
ret
def
get_file_json_data
(
self
,
file
):
# 获取文件数据
data
=
None
with
open
(
file
,
'r'
)
as
f
:
content
=
f
.
read
()
if
content
.
startswith
(
u'
\ufeff
'
):
content
=
content
.
encode
(
'utf8'
)[
3
:]
.
decode
(
'utf8'
)
data
=
json
.
loads
(
content
)
return
data
def
get_image_size
(
self
,
image_url
):
# 获取图片宽高
try
:
url
=
image_url
+
IMAGE_SUFFIX
+
'?imageInfo'
response
=
requests
.
request
(
"GET"
,
url
)
info
=
response
.
json
()
return
info
.
get
(
'width'
),
info
.
get
(
'height'
)
except
Exception
as
e
:
logging_exception
()
return
None
def
image_info
(
self
,
urls
):
# 获取图片信息
ret
=
[]
for
url
in
urls
:
image_url
=
upload_image
(
url
)
while
not
image_url
:
image_url
=
upload_image
(
url
)
width
,
height
=
self
.
get_image_size
(
image_url
)
while
not
width
and
not
height
:
width
,
height
=
self
.
get_image_size
(
image_url
)
ret
.
append
(
{
'url'
:
image_url
.
replace
(
'http://alpha.gmeiapp.com/'
,
''
),
'height'
:
height
,
'width'
:
width
,
}
)
return
ret
def
revise_comments
(
self
,
comment
,
from_id
):
ret
=
[]
comment
[
'from_id'
]
=
from_id
comment
[
'content'
]
=
comment
.
get
(
'comment'
)
reply
=
comment
.
pop
(
'reply'
,
None
)
if
not
reply
:
return
comment
,
ret
for
info
in
reply
:
info
[
'from_id'
]
=
comment
.
get
(
'from_id'
)
info
[
'reply_id'
]
=
comment
.
get
(
'id'
)
info
[
'type'
]
=
comment
.
get
(
'type'
)
ret
.
append
(
info
)
return
comment
,
ret
def
create_comment
(
self
,
comment_list
,
from_id
,
platform
,
topic_id
=
None
,
pictorial_id
=
None
):
top_comment
,
comments
=
self
.
revise_comments
(
comment_list
,
from_id
)
top_comment
[
'user_id'
]
=
self
.
get_user_id
(
id_
=
top_comment
.
get
(
'user'
)
.
get
(
'id'
),
platform
=
platform
)
top_comment
.
pop
(
'user'
)
error
,
ret
=
self
.
call_rpc
(
'venus/community/crawl/replys'
,
data
=
[
top_comment
],
platform
=
platform
,
topic_id
=
topic_id
,
pictorial_id
=
pictorial_id
)
if
error
:
return
error
,
ret
top_id
=
ret
.
get
(
'reply_ids'
)[
0
]
for
obj
in
comments
:
obj
[
'user_id'
]
=
self
.
get_user_id
(
id_
=
obj
.
get
(
'user'
)
.
get
(
'id'
),
platform
=
platform
)
obj
.
pop
(
'user'
)
error
,
_
=
self
.
call_rpc
(
'venus/community/crawl/replys'
,
data
=
comments
,
platform
=
platform
,
topic_id
=
topic_id
,
pictorial_id
=
pictorial_id
,
top_id
=
top_id
)
if
error
:
return
error
,
None
return
None
,
None
def
create_topic
(
self
,
topics
,
platform
):
for
topic
in
topics
:
topic_comments
=
topic
.
pop
(
'comments'
,
None
)
images
=
topic
.
pop
(
'image'
)
topic
[
'images'
]
=
self
.
image_info
(
images
)
topic
[
'user_id'
]
=
self
.
get_user_id
(
id_
=
topic
.
get
(
'id'
),
platform
=
platform
)
error
,
topic_obj
=
self
.
call_rpc
(
'venus/community/crawl/topic'
,
data
=
topic
,
platform
=
platform
,
pictorial_id
=
None
)
if
error
:
print
(
error
,
topic
.
get
(
'id'
))
self
.
create_faild_topic_list
.
append
(
topic
.
get
(
'id'
))
continue
if
not
topic_comments
:
continue
from_id
=
topic
.
get
(
'id'
)
if
platform
==
GRAP_PLATFORM
.
XIAOHONGSHU
:
for
topic_comment
in
topic_comments
:
topic_comment
[
'topic_id'
]
=
topic_obj
.
get
(
'id'
)
self
.
total_topic_comments
.
append
(
topic_comments
)
return
None
,
None
def
create_pictorial
(
self
,
pictorial
,
platform
):
topics
=
[]
pictorial_id
=
None
if
not
pictorial
:
return
None
,
None
pictorial_comments
=
pictorial
.
pop
(
'comments'
,
None
)
images
=
self
.
image_info
(
pictorial
.
pop
(
'image'
))
index
=
0
for
obj
in
images
:
index
+=
1
topics
.
append
({
'id'
:
pictorial
.
get
(
'id'
)
+
str
(
index
),
'content'
:
pictorial
.
get
(
'content'
),
'images'
:
[
obj
],
'create_time'
:
pictorial
.
get
(
'create_time'
),
'user_id'
:
self
.
get_user_id
(
id_
=
obj
.
get
(
'url'
),
platform
=
platform
)
})
pictorial
[
'user_id'
]
=
self
.
get_user_id
(
id_
=
pictorial
.
get
(
'id'
),
platform
=
platform
)
pictorial
[
'description'
]
=
pictorial
.
get
(
'content'
)
# 榜单名称取爬取内容的前20字符
index_end
=
20
if
len
(
pictorial
.
get
(
'content'
))
<
index_end
:
index_end
=
len
(
pictorial
.
get
(
'content'
))
-
1
pictorial
[
'name'
]
=
pictorial
.
get
(
'content'
)[:
index_end
]
error
,
pictorial_obj
=
self
.
call_rpc
(
'venus/community/crawl/pictorial'
,
data
=
pictorial
,
platform
=
platform
)
if
error
:
return
error
,
None
pictorial_id
=
pictorial_obj
.
get
(
'id'
)
if
topics
:
for
obj
in
topics
:
error
,
_
=
self
.
call_rpc
(
'venus/community/crawl/topic'
,
data
=
obj
,
platform
=
platform
,
pictorial_id
=
pictorial_id
)
if
error
:
return
error
,
None
if
pictorial_comments
:
if
platform
==
GRAP_PLATFORM
.
XIAOHONGSHU
:
for
pictorial_comment
in
pictorial_comments
:
pictorial_comment
[
'pictorial_id'
]
=
pictorial_id
self
.
total_pictorial_comments
.
append
(
pictorial_comments
)
return
None
,
None
def
process
(
self
):
platform
=
4
# 帖子
print
(
'----- start deal topic at {} -----'
.
format
(
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S
%
f'
)))
topic_data
=
self
.
get_json_data_from_dir
(
is_topic
=
1
)
for
info
in
topic_data
:
print
(
info
)
# self.create_topic(topics=info, platform=platform)
# self.del_cache()
print
(
'----- end deal topic at {} -----'
.
format
(
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S
%
f'
)))
# 榜单
# print('----- start deal pictorial at {} -----'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S %f')))
# pictorial_data = self.get_json_data_from_dir(is_pictorial=1)
# print('----- end deal pictorial at {} -----'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S %f')))
# if is_pictorial:
# for obj in topics:
# error, _ = self.create_pictorial(pictorial=obj, platform=platform)
# if error:
# self.del_cache()
# return self.error(error=error)
self
.
del_cache
()
return
self
.
ok
()
if
__name__
==
"__main__"
:
a
=
ReplyBatchCreate
()
a
.
process
()
This diff is collapsed.
Click to expand it.
api/views/pictorial.py
View file @
4155d54d
...
@@ -132,8 +132,8 @@ class CreatePictorial(BaseView):
...
@@ -132,8 +132,8 @@ class CreatePictorial(BaseView):
if
platform
==
GRAP_PLATFORM
.
XIAOHONGSHU
:
if
platform
==
GRAP_PLATFORM
.
XIAOHONGSHU
:
for
topic_comment
in
topic_comments
:
for
topic_comment
in
topic_comments
:
error
,
_
=
self
.
create_comment
(
comment_list
=
topic_comment
,
from_id
=
from_id
,
platform
=
platform
,
topic_id
=
topic_obj
.
get
(
'id'
))
error
,
_
=
self
.
create_comment
(
comment_list
=
topic_comment
,
from_id
=
from_id
,
platform
=
platform
,
topic_id
=
topic_obj
.
get
(
'id'
))
if
error
:
if
error
:
return
error
,
_
return
error
,
_
return
None
,
None
return
None
,
None
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment