Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
saturn
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
saturn
Commits
a92fd9f9
Commit
a92fd9f9
authored
Dec 28, 2018
by
zhanglu
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'test' into 'master'
Test See merge request
!1
parents
5792ffbc
db6692de
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
192 additions
and
7 deletions
+192
-7
sensitive.py
api/utils/sensitive.py
+104
-0
topic.py
api/views/topic.py
+35
-7
spawn_task.py
libs/spawn_task.py
+53
-0
No files found.
api/utils/sensitive.py
0 → 100644
View file @
a92fd9f9
"""敏感词等反垃圾相关。"""
from
engine.rpc
import
get_current_rpc_invoker
from
libs.spawn_task
import
SpawnTask
,
Task
class
DRIVERS
(
object
):
"""后续可能接入多个服务"""
YIDUN
=
"1"
# "易盾"
class
ENDPOINT
(
object
):
"""请求入口。"""
# 所有请求接入到 antispam 服务中
endpoint
=
{
DRIVERS
.
YIDUN
:
"antispam/yidun"
,
}
def
__getitem__
(
self
,
key
):
try
:
return
self
.
endpoint
[
key
]
except
KeyError
:
raise
Exception
(
"没有对应的检测平台"
)
class
Sensitive
(
object
):
"""敏感词等反垃圾相关入口。"""
endpoint
=
ENDPOINT
()
@classmethod
def
_check_job
(
cls
,
endpoint
,
text
):
"""内容检测"""
rpc
=
get_current_rpc_invoker
()
try
:
res
=
rpc
[
endpoint
](
text
=
text
)
.
unwrap
()
except
:
res
=
None
return
res
@classmethod
def
check
(
cls
,
text_list
,
driver
=
DRIVERS
.
YIDUN
,
detail
=
False
):
"""检测"""
endpoint
=
cls
.
endpoint
[
driver
]
tasks
=
[
Task
(
cls
.
_check_job
,
endpoint
,
text
)
for
text
in
text_list
]
spawn
=
SpawnTask
(
tasks
)
spawn
.
run
()
data
=
[]
result
=
{}
if
driver
==
DRIVERS
.
YIDUN
:
for
res
in
spawn
.
result
:
data
.
append
(
cls
.
_yidun_paraser
(
res
,
False
))
for
text
,
r
in
zip
(
text_list
,
data
):
result
[
text
]
=
r
return
result
@classmethod
def
_yidun_paraser
(
cls
,
data
,
detail
=
False
):
"""易盾结果解析"""
# {
# 'code': 200,
# 'msg': 'ok',
# 'result': {
# 'taskId': 'd28e036d95874361916a26d406ea7db9',
# 'action': 2,
# 'labels': [
# {
# 'label': 600,
# 'level': 2,
# 'details': {
# 'hint': ['傻逼']
# }
# }
# ]
# }
# }
hints
=
[]
if
not
data
or
"detail"
in
data
:
# 程序出错啦
return
action
=
data
[
"result"
][
"action"
]
if
action
==
0
:
# "通过"
return
[]
lables
=
data
.
get
(
"result"
,
{})
.
get
(
"labels"
,
[])
for
lable
in
lables
:
hints
.
extend
(
lable
.
get
(
'details'
,
{})
.
get
(
"hint"
))
# 是否需要详细敏感词列表
if
not
detail
:
return
True
if
hints
else
False
return
hints
api/views/topic.py
View file @
a92fd9f9
import
json
from
api.views.base_view
import
BaseView
from
api.utils.sensitive
import
Sensitive
class
CreateTopicForBatch
(
BaseView
):
...
...
@@ -19,26 +19,54 @@ class CreateTopicForBatch(BaseView):
if
not
topic_list
:
return
self
.
ok
()
topics
=
[]
# 敏感词检测,获取可用的帖子
check_info
=
Sensitive
.
check
([
topic
[
"content"
]
for
topic
in
topic_list
if
topic
.
get
(
"content"
)])
for
topic
in
topic_list
:
succ
=
check_info
.
get
(
topic
.
get
(
"content"
))
if
topic
.
get
(
"content"
)
else
True
if
not
succ
:
topics
.
append
(
topic
)
if
not
topics
:
return
self
.
ok
()
tag_names
=
[]
for
item
in
topic_list
:
tag_names
.
extend
(
item
.
get
(
"tags"
,
[]))
for
item
in
topics
:
tags
=
item
.
get
(
"tags"
)
or
[]
tag_names
.
extend
([
tag
.
replace
(
"#"
,
''
)
.
strip
()
for
tag
in
tags
])
item
[
"user_id"
]
=
user_id
check_info
=
Sensitive
.
check
(
tag_names
)
tags
=
[
tag_name
for
tag_name
,
succ
in
check_info
.
items
()
if
not
succ
]
# 先创建标签
_tag_error
,
_tag_data
=
self
.
call_rpc
(
"venus/community/tag/batch_create_tag_by_name"
,
tag_names
=
list
(
filter
(
None
,
set
(
tag_names
)))
tag_names
=
tags
)
if
_tag_error
:
return
self
.
error
(
_tag_error
)
# 更新发帖
for
item
in
topic_list
:
item
[
"tag_ids"
]
=
[
_tag_data
.
get
(
tag_name
,
0
)
for
tag_name
in
item
.
get
(
"tags"
,
[])]
# 处理标签,将文本中的标签处理成现有标签
for
item
in
topics
:
tags
=
item
.
get
(
"tags"
)
or
[]
tags
=
[
tag
.
replace
(
"#"
,
''
)
.
strip
()
for
tag
in
tags
]
content
=
item
[
"content"
]
for
tag_name
,
tag_id
in
_tag_data
.
items
():
if
tag_name
in
tags
:
alpha_tag
=
'<topic>{'
+
'"id":{},"name":"{}"'
.
format
(
tag_id
,
tag_name
)
+
'}</topic>'
content
=
content
.
replace
(
'#'
+
tag_name
,
alpha_tag
)
item
[
"content"
]
=
content
.
replace
(
'#'
,
''
)
item
[
"tag_ids"
]
=
[
_tag_data
[
tag_name
]
for
tag_name
in
tags
if
_tag_data
.
get
(
tag_name
)
]
create_err
,
result
=
self
.
call_rpc
(
"venus/community/topic/batch_create_for_inner"
,
topic_list
=
topic
_list
topic_list
=
topic
s
)
if
create_err
:
return
self
.
error
(
create_err
)
...
...
libs/spawn_task.py
0 → 100644
View file @
a92fd9f9
"""利用gevent并发网络请求"""
import
gevent
from
gevent
import
monkey
monkey
.
patch_socket
()
class
Task
(
object
):
def
__init__
(
self
,
serve
,
*
args
,
**
kwargs
):
self
.
serve
=
serve
self
.
args
=
args
self
.
kwargs
=
kwargs
class
SpawnTask
(
object
):
TIMEOUT
=
5
def
__init__
(
self
,
tasks
):
"""tasks:任务列表,其中每个task包含任务以及对应的参数。
tasks = [
(get_sensitive, text1, )),
(get_sensitive, text2, )),
(get_sensitive, text3, )),
]
"""
self
.
tasks
=
tasks
self
.
jobs
=
[]
def
run
(
self
):
"""开始任务"""
# 每个任务的时间在
self
.
_spawn
()
self
.
_joinall
()
@property
def
result
(
self
):
return
[
job
.
value
for
job
in
self
.
jobs
]
def
_spawn
(
self
):
self
.
jobs
=
[
gevent
.
spawn
(
task
.
serve
,
*
task
.
args
,
**
task
.
kwargs
)
for
task
in
self
.
tasks
]
def
_joinall
(
self
):
gevent
.
joinall
(
self
.
jobs
,
timeout
=
self
.
TIMEOUT
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment