Commit 54417f54 authored by 王浩's avatar 王浩

Merge branch 'test' into 'master'

crawl reply insert

See merge request !97
parents 33f81468 6503941f
...@@ -6,6 +6,7 @@ from .views import topic ...@@ -6,6 +6,7 @@ from .views import topic
from .views import tag from .views import tag
from .views import reply from .views import reply
from .views import product from .views import product
from .views import pictorial
urlpatterns = [ urlpatterns = [
...@@ -32,4 +33,7 @@ urlpatterns = [ ...@@ -32,4 +33,7 @@ urlpatterns = [
# product # product
url(r'^v1/product/batch_create$', product.ProductBatchCreate.as_view(), name='product_batch_create'), url(r'^v1/product/batch_create$', product.ProductBatchCreate.as_view(), name='product_batch_create'),
url(r'^v1/brand/batch_create$', product.BrandBatchCreate.as_view(), name='brand_batch_create'), url(r'^v1/brand/batch_create$', product.BrandBatchCreate.as_view(), name='brand_batch_create'),
# pictorial
url(r'^v1/pictorial/create$', pictorial.CreatePictorial.as_view(), name='pictorial_create'),
] ]
import requests
from gm_upload import upload, upload_file
from gm_upload import IMG_TYPE
def upload_image(url, img_type=IMG_TYPE.TOPIC):
'''非站内图片处理'''
try:
response = requests.get(url)
return upload(response.content, img_type=img_type)
except:
return None
import json
import requests
from random import randint
from api.views.base_view import BaseView
from api.utils.sensitive import Sensitive
from api.utils.upload import upload_image
from api.cache.cache import ins_cache
from libs.user import get_user_by_ids
from alpha_types.venus import ERROR
from alpha_types.venus import GRAP_PLATFORM
from engine.logger import info_logger, error_logger, logging_exception
pictorial_id_cache = "pictorial_cache"
IMAGE_SUFFIX = '-w'
class CreatePictorial(BaseView):
"""
画报爬取接口
"""
user_id_start = 241757306 # end 241806255
del_cache_keys = []
def del_cache(self):
for obj in self.del_cache_keys:
ins_cache.delete(obj)
def get_random_user_id(self):
while True:
index = randint(0, 5000)
user_id = self.user_id_start + index
error, data = self.call_rpc('venus/community/user/is_shadow', user_id=user_id)
ret = data.get('user_id')
if ret:
return user_id
def get_user_id(self, id_, platform):
cache_key = 'grap:{}:{}'.format(platform, id_)
exist_key = 'grap:{}:{}'
value = ins_cache.get(cache_key)
user_id = None
if not value:
while True:
user_id = self.get_random_user_id()
exist = exist_key.format(platform, user_id)
if not ins_cache.get(exist):
ins_cache.set(exist, id_)
self.del_cache_keys.append(exist)
break
ins_cache.set(cache_key, user_id)
self.del_cache_keys.append(exist)
else:
user_id = int(value)
return user_id
def get_image_size(self, image_url):
# 获取图片宽高
try:
url = image_url + IMAGE_SUFFIX + '?imageInfo'
response = requests.request("GET", url)
info = response.json()
return info.get('width'), info.get('height')
except Exception as e:
logging_exception()
return None
def image_info(self, urls):
ret = []
for url in urls:
image_url = upload_image(url)
while not image_url:
image_url = upload_image(url)
width, height = self.get_image_size(image_url)
while not width and not height:
width, height = self.get_image_size(image_url)
ret.append(
{
'url': image_url.replace('http://alpha.gmeiapp.com/', ''),
'height': height,
'width': width,
}
)
return ret
def revise_comments(self, comment, from_id):
ret = []
comment['from_id'] = from_id
comment['content'] = comment.get('comment')
reply = comment.pop('reply', None)
if not reply:
return comment, ret
for info in reply:
info['from_id'] = comment.get('from_id')
info['reply_id'] = comment.get('id')
info['type'] = comment.get('type')
ret.append(info)
return comment, ret
def create_comment(self, comment_list, from_id, platform, topic_id=None, pictorial_id=None):
top_comment, comments = self.revise_comments(comment_list, from_id)
top_comment['user_id'] = self.get_user_id(id_=top_comment.get('user').get('id'), platform=platform)
top_comment.pop('user')
error, ret = self.call_rpc('venus/community/crawl/replys', data=[top_comment], platform=platform, topic_id=topic_id, pictorial_id=pictorial_id)
if error:
return error, ret
top_id = ret.get('reply_ids')[0]
for obj in comments:
obj['user_id'] = self.get_user_id(id_=obj.get('user').get('id'), platform=platform)
obj.pop('user')
error, _ = self.call_rpc('venus/community/crawl/replys', data=comments, platform=platform, topic_id=topic_id, pictorial_id=pictorial_id, top_id=top_id)
if error:
return error, None
return None, None
def create_topic(self, topics, platform):
for topic in topics:
topic_comments = topic.pop('comments', None)
images = topic.pop('image')
topic['images'] = self.image_info(images)
topic['user_id'] = self.get_user_id(id_=topic.get('id'), platform=platform)
error, topic_obj = self.call_rpc('venus/community/crawl/topic', data=topic, platform=platform, pictorial_id=None)
if error:
return error, _
if not topic_comments:
continue
from_id = topic.get('id')
if platform == GRAP_PLATFORM.XIAOHONGSHU:
for topic_comment in topic_comments:
error, _ = self.create_comment(comment_list=topic_comment, from_id=from_id, platform=platform, topic_id=topic_obj.get('id'))
if error:
return error, _
return None, None
def create_pictorial(self, pictorial, platform):
topics = []
pictorial_id = None
if not pictorial:
return None, None
pictorial_comments = pictorial.pop('comments', None)
images = self.image_info(pictorial.pop('image'))
index = 0
for obj in images:
index += 1
topics.append({
'id': pictorial.get('id') + str(index),
'content': pictorial.get('content'),
'images': [obj],
'create_time': pictorial.get('create_time'),
'user_id': self.get_user_id(id_=obj.get('url'), platform=platform)
})
pictorial['user_id'] = self.get_user_id(id_=pictorial.get('id'), platform=platform)
pictorial['description'] = pictorial.get('content')
# 榜单名称取爬取内容的前20字符
index_end = 20
if len(pictorial.get('content')) < index_end:
index_end = len(pictorial.get('content')) - 1
pictorial['name'] = pictorial.get('content')[:index_end]
error, pictorial_obj = self.call_rpc('venus/community/crawl/pictorial', data=pictorial, platform=platform)
if error:
return error, None
pictorial_id = pictorial_obj.get('id')
if topics:
for obj in topics:
error, _ = self.call_rpc('venus/community/crawl/topic', data=obj, platform=platform, pictorial_id=pictorial_id)
if error:
return error, None
if pictorial_comments:
if platform == GRAP_PLATFORM.XIAOHONGSHU:
for pictorial_comment in pictorial_comments:
error, _ = self.create_comment(comment_list=pictorial_comment, from_id=pictorial.get('id'), platform=platform, pictorial_id=pictorial_id)
if error:
return error, _
return None, None
def post(self, request):
topics = json.loads(request.POST.get('topics', '[]'))
platform = int(request.POST.get('platform', GRAP_PLATFORM.XIAOHONGSHU))
is_pictorial = request.POST.get('is_pictorial', None)
if not topics:
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
for topic in topics:
if not topic.get('id'):
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
if not topic.get('content'):
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
if not topic.get('image'):
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
if not topic.get('create_time'):
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
if not topic.get('user').get('id'):
return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE))
if is_pictorial:
is_pictorial = int(is_pictorial)
if is_pictorial:
for obj in topics:
error, _ = self.create_pictorial(pictorial=obj, platform=platform)
if error:
self.del_cache()
return self.error(error=error)
else:
error, _ = self.create_topic(topics=topics, platform=platform)
if error:
self.del_cache()
return self.error(error=error)
self.del_cache()
return self.ok()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment