Commit 9a5f5f81 authored by 王浩's avatar 王浩

Merge branch 'haow/pin_crawl' into 'test'

Haow/pin crawl

See merge request !60
parents 84a581ee 7965e62f
......@@ -7,7 +7,7 @@ from api.cache.cache import ins_cache
from libs.user import get_user_by_ids
from alpha_types.venus import ERROR as CODES
from alpha_types.venus import GRAP_PLATFORM
from engine.logger import info_logger
from engine.logger import info_logger, error_logger
topic_id_cache = "ins_account_cache"
......@@ -18,6 +18,99 @@ class CreateTopicForBatch(BaseView):
内部使用,批量建帖
"""
def batch_create_tags(self, tags, is_location=None, is_own=None):
info_logger.info({
'api': 'venus/community/tag/batch_create_tag_by_name',
'tags': tags,
'is_own': 1,
})
try:
_tag_error, _tag_data = self.call_rpc(
"venus/community/tag/batch_create_tag_by_name",
tags=tags, is_own=is_own, is_location=is_location,
)
if _tag_error:
error_logger.error({'api': 'venus/community/tag/batch_create_tag_by_name',
'error': _tag_error})
_tag_data = {}
except Exception as e:
error_logger.error({'api': 'venus/community/tag/batch_create_tag_by_name',
'information': e})
_tag_data = {}
info_logger.info({
'api': 'venus/community/tag/batch_create_tag_by_name',
'_tag_data': _tag_data,
})
return _tag_error, _tag_data
def replace_tag_info(self, topic_list, tag_data):
# 处理标签,将文本中的标签处理成现有标签
for item in topic_list:
tags = item.get("tags") or []
tags = [tag.replace("#", '').strip() for tag in tags]
#添加地域标签
if item.get("location") and item.get("location").get("name"):
tags.append(item.get("location").get("name"))
content = item.get("content")
for tag_name, tag_id in tag_data.items():
if tag_name in tags:
alpha_tag = '<topic>{' + '"id":{},"name":"{}"'.format(tag_id, tag_name) + '}</topic>'
content = content.replace('#' + tag_name, alpha_tag)
item["content"] = content.replace('#', '')
item["tag_ids"] = [
tag_data[tag_name]
for tag_name in tags if tag_data.get(tag_name)
]
return topic_list
def check_exist_ids(self, topics, platform):
# 帖子去重 redis层面
not_exists_ids = []
topic_list = []
if platform == GRAP_PLATFORM.INS:
cache_key = topic_id_cache
elif platform == GRAP_PLATFORM.PIN:
cache_key = topic_id_cache + ":2"
elif platform == GRAP_PLATFORM.FASHION:
cache_key = topic_id_cache + ":3"
else:
return
if platform in (GRAP_PLATFORM.INS, GRAP_PLATFORM.PIN, GRAP_PLATFORM.FASHION):
for item in topics:
_id = item.get("id")
if not _id:
continue
exists = ins_cache.sismember(cache_key, _id)
if exists:
continue
item["platform"] = platform
item["platform_id"] = _id
topic_list.append(item)
not_exists_ids.append(_id)
return not_exists_ids, topic_list, cache_key
def check_sensitive_content(self, topic_list):
# 敏感词检测,获取可用的帖子
topics = []
check_info = Sensitive.check([topic["content"] for topic in topic_list if topic.get("content")])
for topic in topic_list:
if topic.get('content'):
succ = check_info.get(topic.get("content"))
if not succ:
topics.append(topic)
else:
if topic.get('images') or topic.get('video'):
topics.append(topic)
return topics
def post(self, request):
user_id = request.POST.get("user_id", 0)
......@@ -48,18 +141,12 @@ class CreateTopicForBatch(BaseView):
if not topic_list:
return self.ok()
topics = []
# 敏感词检测,获取可用的帖子
# check_info = Sensitive.check([topic["content"] for topic in topic_list if topic.get("content")])
# for topic in topic_list:
# if topic.get('content'):
# succ = check_info.get(topic.get("content"))
# if not succ:
# topics.append(topic)
# else:
# if topic.get('images') or topic.get('video'):
# topics.append(topic)
for topic in topic_list:
# checked_topics = self.check_sensitive_content(topic_list)
checked_topics = topic_list
topics = []
for topic in checked_topics:
if topic.get('content'):
topics.append(topic)
else:
......@@ -80,82 +167,25 @@ class CreateTopicForBatch(BaseView):
item["is_online"] = is_online
item["pictorial_tag_ids"] = pictorial_tag_ids
if item.get("location") and item.get("location").get("name"):
_tag_error, _location_tag_data = self.call_rpc(
"venus/community/tag/batch_create_tag_by_name",
tags=tags, is_location=1, is_own=1,
)
if _tag_error:
return self.error(_tag_error)
_tag_error, _location_tag_data = self.batch_create_tags(tags=tags, is_location=1, is_own=1)
location_tags.append(_location_tag_data)
not_exists_ids = []
topic_list = []
if platform == GRAP_PLATFORM.INS:
cache_key = topic_id_cache
elif platform == GRAP_PLATFORM.PIN:
cache_key = topic_id_cache + ":2"
elif platform == GRAP_PLATFORM.FASHION:
cache_key = topic_id_cache + ":3"
else:
return
if platform in (GRAP_PLATFORM.INS, GRAP_PLATFORM.PIN, GRAP_PLATFORM.FASHION):
for item in topics:
_id = item.get("id")
if not _id:
continue
exists = ins_cache.sismember(cache_key, _id)
if exists:
continue
item["platform"] = platform
item["platform_id"] = _id
topic_list.append(item)
not_exists_ids.append(_id)
not_exists_ids, topic_list, cache_key = self.check_exist_ids(topics=topics, platform=platform)
# check_info = Sensitive.check(tag_names)
# tags = [tag_name for tag_name, succ in check_info.items() if not succ]
tags = tag_names
# 先创建标签
info_logger.info({
'api': 'venus/community/tag/batch_create_tag_by_name',
'tags': tags,
'is_own': 1,
})
_tag_error, _tag_data = self.call_rpc(
"venus/community/tag/batch_create_tag_by_name",
tags=tags, is_own=1
)
if _tag_error:
return self.error(_tag_error)
info_logger.info({
'api': 'venus/community/tag/batch_create_tag_by_name',
'_tag_data': _tag_data,
})
#_tag_data.update(location_tags)
_tag_error, _tag_data = self.batch_create_tags(tags=tags, is_own=1)
# 地域标签
if location_tags:
_tag_data = _tag_data.extend(location_tags)
# 更新发帖
# 处理标签,将文本中的标签处理成现有标签
for item in topic_list:
tags = item.get("tags") or []
tags = [tag.replace("#", '').strip() for tag in tags]
#添加地域标签
if item.get("location") and item.get("location").get("name"):
tags.append(item.get("location").get("name"))
content = item.get("content")
for tag_name, tag_id in _tag_data.items():
if tag_name in tags:
alpha_tag = '<topic>{' + '"id":{},"name":"{}"'.format(tag_id, tag_name) + '}</topic>'
content = content.replace('#' + tag_name, alpha_tag)
item["content"] = content.replace('#', '')
item["tag_ids"] = [
_tag_data[tag_name]
for tag_name in tags if _tag_data.get(tag_name)
]
topic_list = self.replace_tag_info(topic_list=topic_list, tag_data=_tag_data)
info_logger.info({
'api': 'venus/community/topic/batch_create_for_inner',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment