Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
saturn
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
赵磊
saturn
Commits
9df8c75a
Commit
9df8c75a
authored
Aug 23, 2019
by
zhongshangwu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
微博内容入库增加一级评论限制
parent
73481de5
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
14 deletions
+5
-14
weibo_commands.py
api/management/commands/weibo_commands.py
+5
-14
No files found.
api/management/commands/weibo_commands.py
View file @
9df8c75a
...
@@ -17,11 +17,6 @@ from engine.logger import info_logger, error_logger, logging_exception
...
@@ -17,11 +17,6 @@ from engine.logger import info_logger, error_logger, logging_exception
IMAGE_SUFFIX
=
'-w'
IMAGE_SUFFIX
=
'-w'
FILE_PATH
=
'/Users/zhongshangwu/workspace/gengmei/like/saturn/weibo/'
FILE_PATH
=
'/Users/zhongshangwu/workspace/gengmei/like/saturn/weibo/'
# TODO
# 1. 图片裁剪上传
# 2. 过滤二级带图评论
# 3. 其他的过滤规则
#
class
Command
(
BaseCommand
):
class
Command
(
BaseCommand
):
user_id_start
=
241757306
# end 241806255
user_id_start
=
241757306
# end 241806255
...
@@ -37,7 +32,6 @@ class Command(BaseCommand):
...
@@ -37,7 +32,6 @@ class Command(BaseCommand):
def
get_random_user_id
(
self
):
def
get_random_user_id
(
self
):
# 随机获取马甲用户ID
# 随机获取马甲用户ID
# return 241759142
while
True
:
while
True
:
index
=
randint
(
0
,
5000
)
index
=
randint
(
0
,
5000
)
user_id
=
self
.
user_id_start
+
index
user_id
=
self
.
user_id_start
+
index
...
@@ -171,7 +165,6 @@ class Command(BaseCommand):
...
@@ -171,7 +165,6 @@ class Command(BaseCommand):
weibo_user_id
=
self
.
get_weibo_id
(
pictorial
),
weibo_user_id
=
self
.
get_weibo_id
(
pictorial
),
platform
=
platform
platform
=
platform
)
)
# print("Pictorial user id:", pictorial['user_id'])
# 榜单名称取爬取内容的前20字符
# 榜单名称取爬取内容的前20字符
index_end
=
20
index_end
=
20
if
len
(
pictorial
.
get
(
'content'
))
<
index_end
:
if
len
(
pictorial
.
get
(
'content'
))
<
index_end
:
...
@@ -181,16 +174,13 @@ class Command(BaseCommand):
...
@@ -181,16 +174,13 @@ class Command(BaseCommand):
pictorial
[
'description'
]
=
pictorial
.
get
(
'content'
)
pictorial
[
'description'
]
=
pictorial
.
get
(
'content'
)
weibo_comments
=
pictorial
.
pop
(
'comments'
,
None
)
# --> 微博评论
weibo_comments
=
pictorial
.
pop
(
'comments'
,
None
)
# --> 微博评论
topics
=
[]
# 一级带图评论 转化为内部的帖子
topic_count
=
0
pictorial_comments
=
[]
# 一级无图评论 转化为榜单的评论
first_pictorial_commennts
=
0
# RPC 调用创建榜单
# RPC 调用创建榜单
pictorial_obj
=
rpc_invoker
[
'venus/community/crawl/pictorial'
](
data
=
pictorial
,
platform
=
platform
)
.
unwrap
()
pictorial_obj
=
rpc_invoker
[
'venus/community/crawl/pictorial'
](
data
=
pictorial
,
platform
=
platform
)
.
unwrap
()
if
not
pictorial_obj
:
if
not
pictorial_obj
:
self
.
create_faild_pictorial_list
.
append
(
pictorial
)
self
.
create_faild_pictorial_list
.
append
(
pictorial
)
return
None
return
None
pictorial_id
=
pictorial_obj
.
get
(
'id'
)
pictorial_id
=
pictorial_obj
.
get
(
'id'
)
self
.
stats
[
weibo_id
]
=
{
self
.
stats
[
weibo_id
]
=
{
"topics"
:
{},
"topics"
:
{},
...
@@ -225,8 +215,6 @@ class Command(BaseCommand):
...
@@ -225,8 +215,6 @@ class Command(BaseCommand):
self
.
create_faild_topic_list
.
append
(
topic
.
get
(
'id'
))
self
.
create_faild_topic_list
.
append
(
topic
.
get
(
'id'
))
else
:
else
:
# 创建帖子评论
# 创建帖子评论
# for topic_coment in replies:
# topic_coment["topic_id"] = topic_obj.get("id")
self
.
stats
[
weibo_id
][
"topics"
][
comment
[
'id'
]]
=
{
self
.
stats
[
weibo_id
][
"topics"
][
comment
[
'id'
]]
=
{
"reply"
:
[]
"reply"
:
[]
}
}
...
@@ -240,6 +228,9 @@ class Command(BaseCommand):
...
@@ -240,6 +228,9 @@ class Command(BaseCommand):
}
}
else
:
# -> to pictorial comment
else
:
# -> to pictorial comment
if
len
(
self
.
stats
[
weibo_id
][
"first_comments"
])
>
50
:
continue
top_comments_obj
=
rpc_invoker
[
'venus/community/crawl/replys'
](
data
=
[
comment
],
platform
=
platform
,
pictorial_id
=
pictorial_id
)
.
unwrap
()
top_comments_obj
=
rpc_invoker
[
'venus/community/crawl/replys'
](
data
=
[
comment
],
platform
=
platform
,
pictorial_id
=
pictorial_id
)
.
unwrap
()
if
not
top_comments_obj
.
get
(
"reply_ids"
):
if
not
top_comments_obj
.
get
(
"reply_ids"
):
self
.
top_pictorial_error_comments
.
append
(
comment
)
self
.
top_pictorial_error_comments
.
append
(
comment
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment