import json import requests from random import randint from api.views.base_view import BaseView from api.utils.sensitive import Sensitive from api.utils.upload import upload_image from api.cache.cache import ins_cache from libs.user import get_user_by_ids from alpha_types.venus import ERROR from alpha_types.venus import GRAP_PLATFORM from engine.logger import info_logger, error_logger, logging_exception pictorial_id_cache = "pictorial_cache" IMAGE_SUFFIX = '-w' class CreatePictorial(BaseView): """ 画报爬取接口 """ user_id_start = 241757306 # end 241806255 del_cache_keys = [] def del_cache(self): for obj in self.del_cache_keys: ins_cache.delete(obj) def get_random_user_id(self): while True: index = randint(0, 5000) user_id = self.user_id_start + index error, data = self.call_rpc('venus/community/user/is_shadow', user_id=user_id) ret = data.get('user_id') if ret: return user_id def get_user_id(self, id_, platform): cache_key = 'grap:{}:{}'.format(platform, id_) exist_key = 'grap:{}:{}' value = ins_cache.get(cache_key) user_id = None if not value: while True: user_id = self.get_random_user_id() exist = exist_key.format(platform, user_id) if not ins_cache.get(exist): ins_cache.set(exist, id_) self.del_cache_keys.append(exist) break ins_cache.set(cache_key, user_id) self.del_cache_keys.append(exist) else: user_id = int(value) return user_id def get_image_size(self, image_url): # 获取图片宽高 try: url = image_url + IMAGE_SUFFIX + '?imageInfo' response = requests.request("GET", url) info = response.json() return info.get('width'), info.get('height') except Exception as e: logging_exception() return None def image_info(self, urls): ret = [] for url in urls: image_url = upload_image(url) while not image_url: image_url = upload_image(url) width, height = self.get_image_size(image_url) while not width and not height: width, height = self.get_image_size(image_url) ret.append( { 'url': image_url.replace('http://alpha.gmeiapp.com/', ''), 'height': height, 'width': width, } ) return ret def revise_comments(self, comment, from_id): ret = [] comment['from_id'] = from_id comment['content'] = comment.get('comment') reply = comment.pop('reply', None) if not reply: return comment, ret for info in reply: info['from_id'] = comment.get('from_id') info['reply_id'] = comment.get('id') info['type'] = comment.get('type') ret.append(info) return comment, ret def create_comment(self, comment_list, from_id, platform, topic_id=None, pictorial_id=None): top_comment, comments = self.revise_comments(comment_list, from_id) top_comment['user_id'] = self.get_user_id(id_=top_comment.get('user').get('id'), platform=platform) top_comment.pop('user') error, ret = self.call_rpc('venus/community/crawl/replys', data=[top_comment], platform=platform, topic_id=topic_id, pictorial_id=pictorial_id) if error: return error, ret top_id = ret.get('reply_ids')[0] for obj in comments: obj['user_id'] = self.get_user_id(id_=obj.get('user').get('id'), platform=platform) obj.pop('user') error, _ = self.call_rpc('venus/community/crawl/replys', data=comments, platform=platform, topic_id=topic_id, pictorial_id=pictorial_id, top_id=top_id) if error: return error, None return None, None def create_topic(self, topics, platform): for topic in topics: topic_comments = topic.pop('comments', None) images = topic.pop('image') topic['images'] = self.image_info(images) topic['user_id'] = self.get_user_id(id_=topic.get('id'), platform=platform) error, topic_obj = self.call_rpc('venus/community/crawl/topic', data=topic, platform=platform, pictorial_id=None) if error: return error, _ if not topic_comments: continue from_id = topic.get('id') if platform == GRAP_PLATFORM.XIAOHONGSHU: for topic_comment in topic_comments: error, _ = self.create_comment(comment_list=topic_comment, from_id=from_id, platform=platform, topic_id=topic_obj.get('id')) if error: return error, _ return None, None def create_pictorial(self, pictorial, platform): topics = [] pictorial_id = None if not pictorial: return None, None pictorial_comments = pictorial.pop('comments', None) images = self.image_info(pictorial.pop('image')) index = 0 for obj in images: index += 1 topics.append({ 'id': pictorial.get('id') + str(index), 'content': pictorial.get('content'), 'images': [obj], 'create_time': pictorial.get('create_time'), 'user_id': self.get_user_id(id_=obj.get('url'), platform=platform) }) pictorial['user_id'] = self.get_user_id(id_=pictorial.get('id'), platform=platform) pictorial['description'] = pictorial.get('content') # 榜单名称取爬取内容的前20字符 index_end = 20 if len(pictorial.get('content')) < index_end: index_end = len(pictorial.get('content')) - 1 pictorial['name'] = pictorial.get('content')[:index_end] error, pictorial_obj = self.call_rpc('venus/community/crawl/pictorial', data=pictorial, platform=platform) if error: return error, None pictorial_id = pictorial_obj.get('id') if topics: for obj in topics: error, _ = self.call_rpc('venus/community/crawl/topic', data=obj, platform=platform, pictorial_id=pictorial_id) if error: return error, None if pictorial_comments: if platform == GRAP_PLATFORM.XIAOHONGSHU: for pictorial_comment in pictorial_comments: error, _ = self.create_comment(comment_list=pictorial_comment, from_id=pictorial.get('id'), platform=platform, pictorial_id=pictorial_id) if error: return error, _ return None, None def post(self, request): topics = json.loads(request.POST.get('topics', '[]')) platform = int(request.POST.get('platform', GRAP_PLATFORM.XIAOHONGSHU)) is_pictorial = request.POST.get('is_pictorial', None) if not topics: return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE)) for topic in topics: if not topic.get('id'): return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE)) if not topic.get('content'): return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE)) if not topic.get('image'): return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE)) if not topic.get('create_time'): return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE)) if not topic.get('user').get('id'): return self.error(self.get_ErrorInfo(ERROR.PARAMS_INCOMPLETE)) if is_pictorial: is_pictorial = int(is_pictorial) if is_pictorial: for obj in topics: error, _ = self.create_pictorial(pictorial=obj, platform=platform) if error: self.del_cache() return self.error(error=error) else: error, _ = self.create_topic(topics=topics, platform=platform) if error: self.del_cache() return self.error(error=error) self.del_cache() return self.ok()