Commit 54417f54 authored by 王浩's avatar 王浩

Merge branch 'test' into 'master'

crawl reply insert

See merge request !97
parents 33f81468 6503941f
......@@ -6,6 +6,7 @@ from .views import topic
from .views import tag
from .views import reply
from .views import product
from .views import pictorial
urlpatterns = [
......@@ -32,4 +33,7 @@ urlpatterns = [
# product
url(r'^v1/product/batch_create$', product.ProductBatchCreate.as_view(), name='product_batch_create'),
url(r'^v1/brand/batch_create$', product.BrandBatchCreate.as_view(), name='brand_batch_create'),
# pictorial
url(r'^v1/pictorial/create$', pictorial.CreatePictorial.as_view(), name='pictorial_create'),
]
import requests
from gm_upload import upload, upload_file
from gm_upload import IMG_TYPE
def upload_image(url, img_type=IMG_TYPE.TOPIC):
'''非站内图片处理'''
try:
response = requests.get(url)
return upload(response.content, img_type=img_type)
except:
return None
import json
import requests
from random import randint
from api.views.base_view import BaseView
from api.utils.sensitive import Sensitive
from api.utils.upload import upload_image
from api.cache.cache import ins_cache
from libs.user import get_user_by_ids
from alpha_types.venus import ERROR
from alpha_types.venus import GRAP_PLATFORM
from engine.logger import info_logger, error_logger, logging_exception
pictorial_id_cache = "pictorial_cache"
IMAGE_SUFFIX = '-w'
class CreatePictorial(BaseView):
"""
画报爬取接口
"""
user_id_start = 241757306 # end 241806255
del_cache_keys = []
def del_cache(self):
for obj in self.del_cache_keys:
ins_cache.delete(obj)
def get_random_user_id(self):
while True:
index = randint(0, 5000)
user_id = self.user_id_start + index
error, data = self.call_rpc('venus/community/user/is_shadow', user_id=user_id)
ret = data.get('user_id')
if ret:
return user_id
def get_user_id(self, id_, platform):
cache_key = 'grap:{}:{}'.format(platform, id_)
exist_key = 'grap:{}:{}'
value = ins_cache.get(cache_key)
user_id = None
if not value:
while True:
user_id = self.get_random_user_id()
exist = exist_key.format(platform, user_id)
if not ins_cache.get(exist):
ins_cache.set(exist, id_)
self.del_cache_keys.append(exist)
break
ins_cache.set(cache_key, user_id)
self.del_cache_keys.append(exist)
else:
user_id = int(value)
return user_id
def get_image_size(self, image_url):
# 获取图片宽高
try:
url = image_url + IMAGE_SUFFIX + '?imageInfo'
response = requests.request("GET", url)
info = response.json()
return info.get('width'), info.get('height')
except Exception as e:
logging_exception()
return None
def image_info(self, urls):
ret = []
for url in urls:
image_url = upload_image(url)
while not image_url:
image_url = upload_image(url)
width, height = self.get_image_size(image_url)
while not width and not height:
width, height = self.get_image_size(image_url)
ret.append(
{
'url': image_url.replace('http://alpha.gmeiapp.com/', ''),
'height': height,
'width': width,
}
)
return ret
def revise_comments(self, comment, from_id):
ret = []
comment['from_id'] = from_id
comment['content'] = comment.get('comment')
reply = comment.pop('reply', None)
if not reply:
return comment, ret
for info in reply:
info['from_id'] = comment.get('from_id')
info['reply_id'] = comment.get('id')
info['type'] = comment.get('type')
ret.append(info)
return comment, ret
def create_comment(self, comment_list, from_id, platform, topic_id=None, pictorial_id=None):
top_comment, comments = self.revise_comments(comment_list, from_id)
top_comment['user_id'] = self.get_user_id(id_=top_comment.get('user').get('id'), platform=platform)
top_comment.pop('user')
error, ret = self.call_rpc('venus/community/crawl/replys', data=[top_comment], platform=platform, topic_id=topic_id, pictorial_id=pictorial_id)
if error:
return error, ret
top_id = ret.get('reply_ids')[0]
for obj in comments:
obj['user_id'] = self.get_user_id(id_=obj.get('user').get('id'), platform=platform)
obj.pop('user')
error, _ = self.call_rpc('venus/community/crawl/replys', data=comments, platform=platform, topic_id=topic_id, pictorial_id=pictorial_id, top_id=top_id)
if error:
return error, None
return None, None
def create_topic(self, topics, platform):
for topic in topics:
topic_comments = topic.pop('comments', None)
images = topic.pop('image')
topic['images'] = self.image_info(images)
topic['user_id'] = self.get_user_id(id_=topic.get('id'), platform=platform)
error, topic_obj = self.call_rpc('venus/community/crawl/topic', data=topic, platform=platform, pictorial_id=None)
if error:
return error, _
if not topic_comments:
continue
from_id = topic.get('id')
if platform == GRAP_PLATFORM.XIAOHONGSHU:
for topic_comment in topic_comments:
error, _ = self.create_comment(comment_list=topic_comment, from_id=from_id, platform=platform, topic_id=topic_obj.get('id'))
if error:
return error, _
return None, None
def create_pictorial(self, pictorial, platform):
topics = []
pictorial_id = None
if not pictorial:
return None, None
pictorial_comments = pictorial.pop('comments', None)
images = self.image_info(pictorial.pop('image'))
index = 0
for obj in images:
index += 1
topics.append({
'id': pictorial.get('id') + str(index),
'content': pictorial.get('content'),
'images': [obj],
'create_time': pictorial.get('create_time'),
'user_id': self.get_user_id(id_=obj.get('url'), platform=platform)
})
pictorial['user_id'] = self.get_user_id(id_=pictorial.get('id'), platform=platform)
pictorial['description'] = pictorial.get('content')
# 榜单名称取爬取内容的前20字符
index_end = 20
if len(pictorial.get('content')) < index_end:
index_end = len(pictorial.get('content')) - 1
pictorial['name'] = pictorial.get('content')[:index_end]
error, pictorial_obj = self.call_rpc('venus/community/crawl/pictorial', data=pictorial, platform=platform)
if error:
return error, None
pictorial_id = pictorial_obj.get('id')
if topics:
for obj in topics:
error, _ = self.call_rpc('venus/community/crawl/topic', data=obj, platform=platform, pictorial_id=pictorial_id)
if error:
return error, None
if pictorial_comments:
if platform == GRAP_PLATFORM.XIAOHONGSHU:
for pictorial_comment in pictorial_comments:
error, _ = self.create_comment(comment_list=pictorial_comment, from_id=pictorial.get('id'), platform=platform, pictorial_id=pictorial_id)
if error:
return error, _
return None, None
def post(self, request):
topics = json.loads(request.POST.get('topics', '[]'))
platform = int(request.POST.get('platform', GRAP_PLATFORM.XIAOHONGSHU))
is_pictorial = request.POST.get('is_pictorial', None)
if not topics:
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
for topic in topics:
if not topic.get('id'):
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
if not topic.get('content'):
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
if not topic.get('image'):
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
if not topic.get('create_time'):
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
if not topic.get('user').get('id'):
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
if is_pictorial:
is_pictorial = int(is_pictorial)
if is_pictorial:
for obj in topics:
error, _ = self.create_pictorial(pictorial=obj, platform=platform)
if error:
self.del_cache()
return self.error(error=error)
else:
error, _ = self.create_topic(topics=topics, platform=platform)
if error:
self.del_cache()
return self.error(error=error)
self.del_cache()
return self.ok()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment