Commit a92fd9f9 authored by zhanglu's avatar zhanglu

Merge branch 'test' into 'master'

Test

See merge request alpha/saturn!1
parents 5792ffbc db6692de
"""敏感词等反垃圾相关。"""
from engine.rpc import get_current_rpc_invoker
from libs.spawn_task import SpawnTask, Task
class DRIVERS(object):
"""后续可能接入多个服务"""
YIDUN = "1" # "易盾"
class ENDPOINT(object):
"""请求入口。"""
# 所有请求接入到 antispam 服务中
endpoint = {
DRIVERS.YIDUN: "antispam/yidun",
}
def __getitem__(self, key):
try:
return self.endpoint[key]
except KeyError:
raise Exception("没有对应的检测平台")
class Sensitive(object):
"""敏感词等反垃圾相关入口。"""
endpoint = ENDPOINT()
@classmethod
def _check_job(cls, endpoint, text):
"""内容检测"""
rpc = get_current_rpc_invoker()
try:
res = rpc[endpoint](text=text).unwrap()
except:
res = None
return res
@classmethod
def check(cls, text_list, driver=DRIVERS.YIDUN, detail=False):
"""检测"""
endpoint = cls.endpoint[driver]
tasks = [Task(cls._check_job, endpoint, text) for text in text_list]
spawn = SpawnTask(tasks)
spawn.run()
data = []
result = {}
if driver == DRIVERS.YIDUN:
for res in spawn.result:
data.append(cls._yidun_paraser(res, False))
for text, r in zip(text_list, data):
result[text] = r
return result
@classmethod
def _yidun_paraser(cls, data, detail=False):
"""易盾结果解析"""
# {
# 'code': 200,
# 'msg': 'ok',
# 'result': {
# 'taskId': 'd28e036d95874361916a26d406ea7db9',
# 'action': 2,
# 'labels': [
# {
# 'label': 600,
# 'level': 2,
# 'details': {
# 'hint': ['傻逼']
# }
# }
# ]
# }
# }
hints = []
if not data or "detail" in data: # 程序出错啦
return
action = data["result"]["action"]
if action == 0: # "通过"
return []
lables = data.get("result", {}).get("labels", [])
for lable in lables:
hints.extend(lable.get('details', {}).get("hint"))
# 是否需要详细敏感词列表
if not detail:
return True if hints else False
return hints
import json
from api.views.base_view import BaseView
from api.utils.sensitive import Sensitive
class CreateTopicForBatch(BaseView):
......@@ -19,26 +19,54 @@ class CreateTopicForBatch(BaseView):
if not topic_list:
return self.ok()
topics = []
# 敏感词检测,获取可用的帖子
check_info = Sensitive.check([topic["content"] for topic in topic_list if topic.get("content")])
for topic in topic_list:
succ = check_info.get(topic.get("content")) if topic.get("content") else True
if not succ:
topics.append(topic)
if not topics:
return self.ok()
tag_names = []
for item in topic_list:
tag_names.extend(item.get("tags", []))
for item in topics:
tags = item.get("tags") or []
tag_names.extend([tag.replace("#", '').strip() for tag in tags])
item["user_id"] = user_id
check_info = Sensitive.check(tag_names)
tags = [tag_name for tag_name, succ in check_info.items() if not succ]
# 先创建标签
_tag_error, _tag_data = self.call_rpc(
"venus/community/tag/batch_create_tag_by_name",
tag_names=list(filter(None, set(tag_names)))
tag_names=tags
)
if _tag_error:
return self.error(_tag_error)
# 更新发帖
for item in topic_list:
item["tag_ids"] = [_tag_data.get(tag_name, 0) for tag_name in item.get("tags", [])]
# 处理标签,将文本中的标签处理成现有标签
for item in topics:
tags = item.get("tags") or []
tags = [tag.replace("#", '').strip() for tag in tags]
content = item["content"]
for tag_name, tag_id in _tag_data.items():
if tag_name in tags:
alpha_tag = '<topic>{' + '"id":{},"name":"{}"'.format(tag_id, tag_name) + '}</topic>'
content = content.replace('#' + tag_name, alpha_tag)
item["content"] = content.replace('#', '')
item["tag_ids"] = [
_tag_data[tag_name]
for tag_name in tags if _tag_data.get(tag_name)
]
create_err, result = self.call_rpc(
"venus/community/topic/batch_create_for_inner",
topic_list=topic_list
topic_list=topics
)
if create_err:
return self.error(create_err)
......
"""利用gevent并发网络请求"""
import gevent
from gevent import monkey
monkey.patch_socket()
class Task(object):
def __init__(self, serve, *args, **kwargs):
self.serve = serve
self.args = args
self.kwargs = kwargs
class SpawnTask(object):
TIMEOUT = 5
def __init__(self, tasks):
"""tasks:任务列表,其中每个task包含任务以及对应的参数。
tasks = [
(get_sensitive, text1, )),
(get_sensitive, text2, )),
(get_sensitive, text3, )),
]
"""
self.tasks = tasks
self.jobs = []
def run(self):
"""开始任务"""
# 每个任务的时间在
self._spawn()
self._joinall()
@property
def result(self):
return [job.value for job in self.jobs]
def _spawn(self):
self.jobs = [
gevent.spawn(task.serve, *task.args, **task.kwargs)
for task in self.tasks
]
def _joinall(self):
gevent.joinall(self.jobs, timeout=self.TIMEOUT)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment