from qa.models.answer import Answer, Question from qa.models.recommendscore import AnswerRelatedRecommendScore from data_sync.utils import to_epoch, tzlc from qa.models.answer import QuestionTag from utils.rpc import get_rpc_invoker from qa.models.toutiao import by_content_type_id_get_keywords, get_content_star_keywords, get_content_title_keywords, \ get_content_star_first_keyword, has_service from tags.services.tag import (get_tagv3_analysis_info, get_tag_v3_operators_tags, get_tag_v3_anecdote_tags, get_tag_v3_anecdote_tag_ids, get_tag_v3_names_by_tag_v3_ids, get_gossip_tags, get_tag_v3_gossip_tag_ids) from talos.services.convert_service.user_convert_service import UserConvertService from data_sync.tractate.transfer import get_keynote_sentence from gm_types.gaia import DOCTOR_TYPE, TAG_TYPE from talos.services.doctor import DoctorService import time import datetime from django.conf import settings import redis import logging import re from algorithm.text_classifical.base import model as alo_model from gm_types.doris import SELECTED_CONTENT_TYPE rpc = get_rpc_invoker() def get_answers(pks): answers = Answer.objects.using("slave").filter(id__in=pks) rs = AnswerRelatedRecommendScore.objects.filter(answer_id__in=pks).values( 'answer_id', 'score' ) rs_dict = {k: v for (k, v) in rs} data = [] for answer in answers: try: item = {} item['id'] = answer.id item['good_click'] = answer.get_good_click item['goodclick_rate_30'] = answer.con_good_click item['user_id'] = answer.user_id item['doctor_id'] = DoctorService.get_doctor_by_user_id_v1(answer.user_id) item['question_id'] = answer.question.id item['smart_rank'] = answer.smart_rank() item['title'] = answer.question.title item['desc'] = answer.question.content item['question_type'] = answer.question.question_type ss = answer.content.encode('utf-16', 'surrogatepass').decode('utf-16') dr = re.compile(r"<[^>]+>", re.S) str_re = dr.sub("", ss) item['answer'] = str_re item['keynote_sentence'] = get_keynote_sentence(answer.content) if int(float(answer.level)) >= 3 else "" item['content_length'] = len(item['answer']) item['has_picture'] = answer.get_has_picture() item["is_online"] = bool(answer.is_online and answer.question.is_online) item['recommend_score'] = rs_dict.get(answer.id, 0) tag_ids = [t.tag_id for t in answer.question.tags] item['tag_ids'] = tag_ids tags_id_list = [t.tag_id for t in answer.question.tags] qt = QuestionTag() item['tag_name'] = qt.get_name_list(tags_id_list) item['tag_name_analyze'] = qt.get_name_list(tags_id_list) item['portrait_tag_name'] = qt.get_project_tags(tag_list=tag_ids) item["hot_score"] = answer.get_hot_score_answer() item['new_smr'] = answer.get_new_smart_rank_score(answer_id=answer.id) score = answer.get_search_new_smart_rank_score(answer_id=answer.id) item['search_new_smr'] = score.get("smart_rank_score", 0) item['detail_smr'] = score.get('smart_rank_v2', 0) # community_score = answer.get_community_answer_smr_score(answer.id) community_hot_score = answer.get_community_answer_hot_score(answer.id) item['new_goodclicks'] = community_hot_score.get('hot_score', 0) item['smart_rank_v2'] = answer.get_new_smart_rank_score(answer_id=answer.id) item['smart_rank_v3'] = answer.get_new_smart_rank_score(answer_id=answer.id) item['platform'] = answer.platform # 新标签 fresh_tag_result = rpc["api/agile_tag/tuple_new_tags"](old_tag_ids=tags_id_list) fresh_tag_id_list = list() fresh_tag_name_list = list() for fresh_tag_id, fresh_tag_name in fresh_tag_result.unwrap(): fresh_tag_id_list.append(fresh_tag_id) fresh_tag_name_list.append(fresh_tag_name) item["fresh_tag_ids"] = fresh_tag_id_list item["fresh_tag_name"] = fresh_tag_name_list item['content_level'] = answer.level item['content_type'] = answer.content_type item['create_time'] = tzlc(answer.create_time) item['update_time_stratific'] = answer.get_answer_update_time_stratific(answer.get_last_any_reply_time()) item['create_time_epoch'] = to_epoch(tzlc(answer.create_time)) item['is_recommend'] = answer.is_recommend # item["last_answer_add_time"] = Question.object.filter(answer.question.id).order_by("-create_time") item['title_pre'] = answer.question.title item['answer_pre'] = "" item['desc_pre'] = "" item["last_any_reply_time"] = tzlc(answer.get_last_any_reply_time()) content_keyword = by_content_type_id_get_keywords(id=answer.id, content_type="answer") item["content_keyword"] = content_keyword item["content_star_keyword"] = get_content_star_keywords(id=answer.id, content_type="answer") item["content_star_first_keyword"] = get_content_star_first_keyword(id=answer.id, content_type="answer") # item["title_keyword"] = get_content_title_keywords(id=answer.id, content_type="answer") # item["has_service"] = has_service(tag_ids, content_keyword) item["has_service"] = False item['operators_add_tags'] = get_tag_v3_operators_tags(content_id=answer.id, content_type="answer") item['anecdote_tags'] = get_tag_v3_anecdote_tags(content_id=answer.id, content_type="answer") item['anecdote_tag_ids'] = get_tag_v3_anecdote_tag_ids(content_id=answer.id, content_type="answer") (need_refresh_data, second_demands_list, second_solutions_list, second_positions_list, second_demands_ids_list, second_solutions_ids_list, second_positions_ids_list, first_demands_ids_list, first_solutions_ids_list, first_positions_ids_list, first_demands_list, first_solutions_list, first_positions_list, project_tags_list, project_tags_ids_list, first_classify_ids_list, first_classify_names_list, second_classify_ids_list, second_classify_names_list) = get_tagv3_analysis_info(content_id=item["id"], content_type="answer") if need_refresh_data: item["tags_v3"] = list(project_tags_list) item["first_demands"] = list(first_demands_list) item["second_demands"] = list(second_demands_list) item["first_solutions"] = list(first_solutions_list) item["second_solutions"] = list(second_solutions_list) item["positions"] = list(first_positions_list) item["second_positions"] = list(second_positions_list) item["tagv3_ids"] = list(project_tags_ids_list) item["first_demands_ids"] = list(first_demands_ids_list) item["second_demands_ids"] = list(second_demands_ids_list) item["first_solutions_ids"] = list(first_solutions_ids_list) item["second_solutions_ids"] = list(second_solutions_ids_list) item["first_positions_ids"] = list(first_positions_ids_list) item["second_positions_ids"] = list(second_positions_ids_list) item["first_classify_ids"] = list(first_classify_ids_list) item["first_classify_names"] = list(first_classify_names_list) item["second_classify_ids"] = list(second_classify_ids_list) item["second_classify_names"] = list(second_classify_names_list) else: item["tags_v3"] = [] item["first_demands"] = [] item["second_demands"] = [] item["first_solutions"] = [] item["second_solutions"] = [] item["positions"] = [] item["second_positions"] = [] item["tagv3_ids"] = [] item["first_demands_ids"] = [] item["second_demands_ids"] = [] item["first_solutions_ids"] = [] item["second_solutions_ids"] = [] item["first_positions_ids"] = [] item["second_positions_ids"] = [] item["first_classify_ids"] = [] item["first_classify_names"] = [] item["second_classify_ids"] = [] item["second_classify_names"] = [] item["user_type"] = get_user_type(answer.user_id) item['reply_number'] = answer.nofake_comment_num(answer.id) item['vote_number'] = answer.nofake_vote_number(answer.id) item['is_gte_3_content_level'] = True if int(answer.level) >= 3 else False item['sort_score'] = 0.7 * int(item['reply_number']) + 0.3 * int(item['vote_number']) + int(answer.level) item['reply_vote_emt'] = 0.7 * int(item['reply_number']) + 0.3 * int(item['vote_number']) item["latest_interaction_time"] = answer.get_answer_latest_interaction_time(item["is_online"], item["content_level"], item["create_time"]) item['latest_create_or_reply_time'] = int(time.mktime(item['create_time'].timetuple())) if item['last_any_reply_time'] and item['last_any_reply_time'] > item['create_time']: item['latest_create_or_reply_time'] = int(time.mktime(item['last_any_reply_time'].timetuple())) # 内容保量字段 days_past_value = (tzlc(datetime.datetime.today()) - item['create_time']).days # if days_past_value < 7: redis_client = redis.StrictRedis.from_url(settings.DORIS_URL) redis_name_for_diary_exposure = "doris:content_exposure:answer" redis_exposure_val = redis_client.hget(redis_name_for_diary_exposure, item["id"]) if redis_exposure_val: redis_exposure_val = int(redis_exposure_val) if (str(item["content_level"]) == "6" and redis_exposure_val < 500) \ or (str(item["content_level"]) == "5" and redis_exposure_val < 400) \ or (str(item["content_level"]) == "4" and redis_exposure_val < 300) \ or (str(item["content_level"]) == "3.5" and redis_exposure_val < 200) \ or (str(item["content_level"]) == "3" and redis_exposure_val < 100): item["is_need_guarantee"] = True else: item["is_need_guarantee"] = False elif days_past_value <= 1: # 认为是当天新增内容,还没产生过曝光 item["is_need_guarantee"] = True # else: # 7天以上强制不保量 # item["is_need_guarantee"] = False item["has_video"] = Answer.has_video(answer) item["is_gif"] = answer.cover_is_dynamic operators_add_tags_names = list() operators_add_tags_names = get_tag_v3_names_by_tag_v3_ids(item['operators_add_tags']) # 判断是否有八卦标签 gossip_tag_names = get_gossip_tags() gossip_inter = list(set(operators_add_tags_names).intersection(set(gossip_tag_names))) if gossip_inter: item["is_gossip"] = True else: item["is_gossip"] = False item["gossip_tag_ids"] = get_tag_v3_gossip_tag_ids(content_id=answer.id, content_type="answer") item["gossip_tags"] = get_tag_v3_gossip_tag_ids(content_id=answer.id, content_type="answer", get_names=True) # 首页精选增加内容分类 明星列表和网红列表 tagv4 = answer.get_answer_tagv4_names(answer_id=answer.id) if tagv4: tags_info = answer.get_om_answer_tags_info(ids=tagv4) item['selected_stars'] = [] item['selected_internet_celebrity'] = [] all_tags = [] for tag in tags_info: all_tags.append(tag.get("id", None)) if tag.get("tag_type", 0) == TAG_TYPE.STAR: item['selected_stars'].append(tag.get("name", None)) elif tag.get("tag_type", 0) == TAG_TYPE.INFLUENCER: item['selected_internet_celebrity'].append(tag.get("name", None)) else: pass if tag.get("id") == 14288: item['operators_add_tags'].append(14288) if 15928 in all_tags: item['selected_content_type'] = SELECTED_CONTENT_TYPE.BEAUTY_STAR elif 10682 in all_tags: item['selected_content_type'] = SELECTED_CONTENT_TYPE.STAR_GOSSIP elif 15930 in all_tags: item['selected_content_type'] = SELECTED_CONTENT_TYPE.BEAUTY_CELEBRITY elif 10683 in all_tags: item['selected_content_type'] = SELECTED_CONTENT_TYPE.CELEBRITY_GOSSIP else: item['selected_content_type'] = SELECTED_CONTENT_TYPE.BEAUTY_PROJECT else: try: result_data = alo_model.run(answer.content) item['selected_content_type'] = int(result_data.get("content_type", 0)) item['selected_internet_celebrity'] = [list(item.keys())[0] for item in result_data.get("celebrity", [])] item['selected_stars'] = [list(item.keys())[0] for item in result_data.get("star", [])] projects = [list(item.keys())[0] for item in result_data.get("projects", [])] item['portrait_tag_name'].extend(projects) except: pass if 3315 in item['operators_add_tags']: item['new_smr'] = answer.get_answer_newuser_smr(answer_id=answer.id) answer_all_tag_names = list() filter_tags_names = ["假体隆胸", "自体脂肪隆胸", "胶原蛋白填充隆胸", "玻尿酸填充隆胸", "胸部假体取出", "胸部失败修复", "胸部下垂矫正", "胸部修复", "胸形美化", "丰胸(隆胸)", "胸部塑身", "玻尿酸隆胸", "假体隆胸", "胶原蛋白隆胸", "埋线隆胸", "自体脂肪隆胸", "胸部注射物取出", "胸部假体取出", "胸部假体取出", "胸部修复", "脂肪胶隆胸", "胸部整形", "隆胸", "胸部美化", "缩胸", "胸部提升", "生胸毛", "胸部护理", "综合隆胸", "植胸毛", "胸部修复", "隆胸修复", "胸部护理", "硅胶隆胸", "胸部护理", "胸部修复", "胸部手术", "胸部提升", "生胸毛", "美胸", "胸毛", "胸部", "胸形", "胸部假体", "胸部kyc", "自体脂肪丰胸", "泰国隆胸", "泰国假体隆胸", "脂肪丰胸", "复合式隆胸", "隆胸假体", "丰胸(隆胸)", "美胸养成", "在线面诊 美胸名医", "为男友隆胸", "手术隆胸", "美胸创造营", "隆胸手术", "隆胸丰胸", "胸部下垂", '手术隆胸', "隆胸丰胸", "假体隆胸 成都罗通贵", "丰胸隆胸", "美胸套餐", "美胸护理", "自体脂肪填充胸部", "自体隆胸", "处女膜修复", "手术祛副乳", "吸脂祛副乳", "阴茎延长", "包皮手术", "阴茎增大", "阴道成形", "自体脂肪填充阴唇", "女性私密检查", "阴唇漂红", "G点注射", "激光脱私处毛发", "乳头缩小", "手术紧缩阴道", "激光紧缩阴道", "私密整形", "私密脱毛", "私密清洁", "阴唇美化", "阴道美化", "阴蒂美化", "处女膜修复", "处女膜", "女性高潮", "阴茎美化", "女性私密", "私密护理", "自体脂肪私密紧致", "盆底修复", "植私密毛发", "阴道紧缩", "私密清洁", "阴道修复", "阴唇整形", "阴蒂整形", "私密紧致", "私密修复", "玻尿酸丰阴唇", "胶原蛋白丰阴唇", "私密kyc", "阴道松弛", "阴蒂", "阴道", "处女膜", "私密美化", "阴蒂美化", "阴茎美化", "阴道成形", "私密紧致", "漂红", "私密手术", "男性私密", "女性私密", "脱私密毛发", "缩阴道激光", "生私密毛发", "私密健康", "阴道修复", "生殖系统", "女性私处", "男性私处", "阴茎", "阴唇", "处女膜", "私密部位", "胸形", "乳晕", "乳头", "乳腺", "副乳", "私密护理", "悦升私密紧收线", "私密整形", "私密其他", "处女膜修复", "阴道紧缩", "阴道成形", "包皮手术", "填充阴唇", "阴蒂肥大整形", "阴唇整形", "私密其他项目", "阴蒂提升", "小阴唇整形", "阴茎延长", "阴茎增粗", "女性私密", "男性私密", "切开缝合法缩紧阴道", "激光紧缩阴道", "女性私密保养品", "女性私密处保养", "D女郎缩阴产品", "私密整形1", "私处整形", "激光脱比基尼部位", "私密回春术", "缩阴术", "激光脱比基尼", "缩阴", "私处护理", "私处紧致", "私密超声提升.", "阴蒂整形", "激光阴道紧缩术"] operators_add_tags_names = list() operators_add_tags_names = get_tag_v3_names_by_tag_v3_ids(item['operators_add_tags']) answer_all_tag_names.extend(item["tags_v3"] + item["first_demands"] + item["second_demands"] + item[ "first_solutions"] + item["second_solutions"] + item["positions"] + item["second_positions"] + item[ 'tag_name'] + item['fresh_tag_name'] + item["operators_add_tags"] + item[ "anecdote_tags"] + item["portrait_tag_name"] + item['gossip_tags'] + item[ 'second_classify_names'] + item[ 'first_classify_names'] + operators_add_tags_names) tags_inter = [item for item in list(set(answer_all_tag_names)) if item in list(set(filter_tags_names))] if tags_inter: item['show_by_index'] = 2 # 2 标签中含有过滤列表中的标签 else: item['show_by_index'] = answer.get_answer_show_by_index(answer.id) data.append(item) logging.info("get data:%s" % data) except (Answer.DoesNotExist, Question.DoesNotExist): pass return data def get_user_type(user_id): """ 0:doctor 1:office 2:daren 3:putong :param user_id: :return: """ try: user_info = UserConvertService.get_user_info_by_user_id(user_id=user_id) if "doctor_id" in user_info and user_info["doctor_type"] == DOCTOR_TYPE.DOCTOR: user_type = 0 elif "hospital_id" in user_info and user_info["doctor_type"] == DOCTOR_TYPE.OFFICER: user_type = 1 elif "membership_level" in user_info and user_info["membership_level"] != 0: user_type = 2 else: user_type = 3 return user_type except: return 4 def get_answer_ids(pks): questions = Question.objects.filter(id__in=pks) ids = [] for question in questions: id = list(question.answers.filter(is_online=True).values_list('id', flat=True)) ids = ids + id return get_answers_rpc(ids) def get_answers_rpc(pks): answers = Answer.objects.filter(id__in=pks) rs = AnswerRelatedRecommendScore.objects.filter(answer_id__in=pks).values( 'answer_id', 'score' ) rs_dict = {k: v for (k, v) in rs} data = [] for answer in answers: try: item = {} item['id'] = answer.id item['user_id'] = answer.user_id item['question_id'] = answer.question.id item['smart_rank'] = answer.smart_rank() item['title'] = answer.question.title item['desc'] = answer.question.content item['question_type'] = answer.question.question_type item['answer'] = answer.content item["is_online"] = bool(answer.is_online and answer.question.is_online) item['recommend_score'] = rs_dict.get(answer.id, 0) item['tag_ids'] = [t.tag_id for t in answer.question.tags] item['content_level'] = answer.level item['content_type'] = answer.content_type item['create_time'] = answer.create_time.timestamp() item['create_time_epoch'] = to_epoch(tzlc(answer.create_time)) item['is_recommend'] = answer.is_recommend key = {} key['id'] = answer.id value = {} value['value'] = item value['key'] = key data.append(value) except (Answer.DoesNotExist, Question.DoesNotExist): pass return data