# coding=utf-8 import datetime from math import ceil import logging from qa.models.answer import ApiAnswerScore from django.conf import settings from data_sync.utils import to_epoch from data_sync.utils import tzlc from qa.models.answer import QuestionTag, Question, Answer, AnswerVote, AnswerReply, QuestionAnswer from utils.rpc import get_rpc_invoker from qa.models.toutiao import by_content_type_id_get_keywords, get_content_star_keywords, get_content_title_keywords, \ get_content_star_first_keyword from tags.services.tag import (get_tagv3_analysis_info, get_tagv3_ids_by_tagv3_names, get_first_demand_ids_by_name, get_second_demand_ids_by_name, get_first_position_ids_by_name, get_second_position_ids_by_name, get_first_solution_ids_by_name, get_second_solution_ids_by_name, get_tag_v3_operators_tags) rpc = get_rpc_invoker() logger = logging.getLogger(__name__) def get_score(result): QUESTION_SCORE_WEIGHT = dict(settings.QUESTION_SCORE_WEIGHT) now = datetime.datetime.now().timestamp() t1 = ceil((now - result['create_time_epoch']) / 86400) t2 = ceil((now - result['answer_last_update_time_epoch']) / 86400) time_score = t1 - pow((t1 - t2), 1.5) / 2 question_answer_likes_score = max(100, result['answer_likes_num'] / 4) answers_num_score = max(100, result['answers_num'] * 2) question_view_score = max(100, ceil(result['views_num'] / 20)) question_score = sum([ answers_num_score * QUESTION_SCORE_WEIGHT['answers_num_score_weight'], question_answer_likes_score * QUESTION_SCORE_WEIGHT['question_answer_likes_score_weight'], question_view_score * QUESTION_SCORE_WEIGHT['question_view_score_weight'] ]) / (time_score * QUESTION_SCORE_WEIGHT['time_score_weight']) answer_likes_score = max(100, result['answer_likes_num'] / 2) answer_views_score = max(100, ceil(result['answer_views_num'] / 20)) answer_score = sum([ answer_likes_score * QUESTION_SCORE_WEIGHT['answer_likes_score_weight'], answer_views_score * QUESTION_SCORE_WEIGHT['answer_views_score_weight'], ]) return question_score + answer_score class Score(object): @classmethod def get_score(cls, question): now = datetime.datetime.now() answer_score = 0 answer_rec = question.answers.filter(is_recommend=True) if answer_rec: rec = answer_rec.first() vote_num = rec.answervote_set.filter(is_fake=False).count() content_score = cls.get_answer_content_score(rec.level) social_score = cls.get_social_score(vote_num, rec.replys.count()) time_score = (now - question.create_time).seconds / 3600 * 0.03 * 0.7 + \ (now - rec.create_time).seconds / 3600 * 0.06 * 1.5 answer_score = 0.8 * content_score + 0.2 * social_score - time_score if answer_score < 0: logger.warning('answer score < 0, question id:{}, score:{}'.format(question.id, answer_score)) answer_score = max(0, answer_score) answer_count_score = cls.get_answer_count_score(question.answer_num) question_time_score = (now - question.create_time).seconds / 3600 * 0.03 a = answer_score * 0.8 + answer_count_score * 0.2 - question_time_score if a < 0: logger.warning('question score < 0, question id:{}, score:{}'.format(question.id, a)) return max(0, a) @staticmethod def get_answer_content_score(level): if level < 2: return 0 elif level < 3: return 5 elif level < 4: return 10 elif level < 5: return 70 else: return 100 @staticmethod def get_social_score(likes_num, reply_num): likes_score = Score.get_likes_score(likes_num) reply_score = Score.get_reply_score(reply_num) return 0.4 * likes_score + 0.6 * reply_score @staticmethod def get_likes_score(likes_num): if likes_num <= 5: return 10 elif likes_num <= 20: return 20 elif likes_num <= 50: return 30 elif likes_num <= 70: return 60 elif likes_num <= 100: return 70 else: return 100 @staticmethod def get_reply_score(reply_num): if reply_num <= 5: return 10 elif reply_num <= 20: return 20 elif reply_num <= 50: return 30 elif reply_num <= 70: return 60 elif reply_num <= 100: return 70 else: return 100 @staticmethod def get_answer_count_score(answer_count): if answer_count <= 2: return 30 elif answer_count < 6: return 50 else: return 70 def get_questions(pks): results = [] ##暂时去掉这些问题ID[230221, 230222, 230223, 230224, 230225, 230255, 230256, 230257,230323] question_ids = [230221, 230222, 230223, 230224, 230225, 230255, 230256, 230257, 230323] for question_id in question_ids: if question_id in pks: pks.remove(question_id) queryset = Question.objects.filter(id__in=pks) if not queryset: return tag_ids = list(QuestionTag.objects.filter(question__in=queryset).values_list('tag', flat=True)) user_ids = list(queryset.values_list('user', flat=True)) question_ids = list(queryset.values_list('id', flat=True)) users = rpc['api/user/get_fundamental_info_by_user_ids'](user_ids=user_ids) tags = rpc['api/tag/info_by_ids'](tag_ids=tag_ids) doctors = rpc['doctor/user/get_doctors'](user_ids=user_ids) user_dict = {str(user['id']): user for user in users.unwrap()} tag_dict = {str(tag['id']): tag for tag in tags.unwrap()} doctor_list = doctors.unwrap()['doctors'] doctor_user_ids = [str(doctor.get('user') or doctor.get('user_id')) for doctor in doctor_list] doctor_dict = {str(doctor.get('user') or doctor.get('user_id')): doctor for doctor in doctor_list} for q in queryset: res = { 'id': q.id, 'create_time': tzlc(q.create_time), 'create_time_epoch': to_epoch(tzlc(q.create_time)), 'has_cover': True if q.cover_url else False, 'title': q.title, 'content': q.content, 'is_online': q.is_online, 'is_recommend': q.is_recommend, 'answers': [], 'answer_likes_num': 0, 'answer_views_num': 0, 'answers_num': 0, 'has_recommended_answer': False, 'views_num': q.view_amount, 'like_num': q.like_num, 'content_type': q.content_type } try: answer_id = list(QuestionAnswer.objects.filter(question_id=q.id).values_list('answer_id', flat=True)) if answer_id: res['best_answer'] = int(answer_id[0]) except: pass # tag tag_ids = list( filter(lambda t: str(t) in tag_dict, QuestionTag.objects.filter(question=q).values_list('tag', flat=True))) res['tags'] = [tag_dict[str(tag_id)]['name'] for tag_id in tag_ids] res['tag_ids'] = tag_ids res['closure_tag_ids'] = rpc['api/tag/closure_tags'](tag_ids=tag_ids) if tag_ids else [] fresh_tag_result = rpc["api/agile_tag/tuple_new_tags"](old_tag_ids=tag_ids) fresh_tag_id_list = list() fresh_tag_name_list = list() for fresh_tag_id, fresh_tag_name in fresh_tag_result.unwrap(): fresh_tag_id_list.append(fresh_tag_id) fresh_tag_name_list.append(fresh_tag_name) res["fresh_tags"] = fresh_tag_name_list res["fresh_closure_tag_ids"] = fresh_tag_id_list res["fresh_tag_ids"] = fresh_tag_id_list # user user = user_dict.get(str(q.user_id), {}) is_doctor = str(q.user_id) in doctor_user_ids user_info = { 'id': q.user_id, 'is_doctor': is_doctor, 'last_name': user.get('nickname', ''), } res['user'] = user_info if is_doctor: doctor_data = doctor_dict.get(str(q.user_id)) org_sink_data = doctor_data.get("org_sink") doctor = {} doctor['id'] = doctor_data.get("id") doctor['name'] = doctor_data.get("name") # 机构罚单下沉 if org_sink_data and org_sink_data[0]: res['org_sink_start_time'] = tzlc( datetime.datetime.fromtimestamp(org_sink_data[0]['org_sink_start_time'])) res['org_sink_end_time'] = tzlc( datetime.datetime.fromtimestamp(org_sink_data[0]['org_sink_start_time'])) res['doctor'] = doctor for key in ('city_tag_id', 'city_province_tag_id', 'city_province_country_tag_id'): if key in user: user_info[key] = user[key] all_answer_ids = [] all_smk = 0 answers = q.answers.order_by('-update_time') last_update_time = q.update_time res['answer_last_update_time_epoch'] = res['create_time_epoch'] last_update_time_answer_reply = q.create_time for index, answer in enumerate(answers): if answer.is_online == False or ( res['id'] in [230221, 230222, 230223, 230224, 230225, 230255, 230256, 230257, 230323] and answer.level > 3): continue if answer.is_recommend: res['has_recommended_answer'] = True all_answer_ids.append(answer.id) if answer.is_online: res['answers'].append({ 'id': answer.id, 'level': answer.level, 'content': answer.content, 'is_online': answer.is_online, 'is_recommend': answer.is_recommend, 'create_time': tzlc(answer.create_time), }) reply = AnswerReply.objects.filter(answer_id=answer.id).order_by('-create_time').first() if reply: last_update_time_answer_reply = max(answer.create_time, last_update_time_answer_reply, reply.create_time) else: last_update_time_answer_reply = max(answer.create_time, last_update_time_answer_reply) res['answer_likes_num'] += answer.like_num res['answer_views_num'] += answer.view_amount res['answers_num'] += 1 res["last_update_time_answer_reply"] = tzlc(last_update_time_answer_reply) ##根据answer_id去拿所有的smart_ranks for item in all_answer_ids: result = ApiAnswerScore.objects.using(settings.DORIS_DB_NAME).filter(answer_id=item).first() all_smk += result.new_score if result else 0.0 res['all_smk'] = all_smk try: answer = q.answers.order_by("-create_time")[0] last_update_time = answer.create_time except Answer.DoesNotExist: pass except IndexError: pass res['last_update_time'] = tzlc(last_update_time) res['answer_last_update_time_epoch'] = to_epoch(res['last_update_time']) question_lastest_answer_time = last_update_time if res["answers_num"] else q.create_time res["question_lastest_answer_time"] = tzlc(question_lastest_answer_time) res["content_keyword"] = by_content_type_id_get_keywords(id=q.id, content_type="question") # res["content_star_keyword"] = get_content_star_keywords(id=q.id, content_type="question") # res["content_star_first_keyword"] = get_content_star_first_keyword(id=q.id, content_type="question") # res["title_keyword"] = get_content_title_keywords(id=q.id, content_type="question") # 大杂烩 res['in_whitelist'] = False res['in_multitopic'] = False res['question_type'] = q.question_type res['score'] = Score.get_score(q) res['operators_add_tags'] = get_tag_v3_operators_tags(content_id=q.id, content_type="question") (need_refresh_data, second_demands_list, second_solutions_list, second_positions_list, second_demands_ids_list, second_solutions_ids_list, second_positions_ids_list, first_demands_ids_list, first_solutions_ids_list, first_positions_ids_list, first_demands_list, first_solutions_list, first_positions_list, project_tags_list, project_tags_ids_list, first_classify_ids_list, first_classify_names_list, second_classify_ids_list, second_classify_names_list) = get_tagv3_analysis_info(content_id=res["id"], content_type="question") if need_refresh_data: item["tags_v3"] = list(project_tags_list) item["first_demands"] = list(first_demands_list) item["second_demands"] = list(second_demands_list) item["first_solutions"] = list(first_solutions_list) item["second_solutions"] = list(second_solutions_list) item["positions"] = list(first_positions_list) item["second_positions"] = list(second_positions_list) item["tagv3_ids"] = list(project_tags_ids_list) item["first_demands_ids"] = list(first_demands_ids_list) item["second_demands_ids"] = list(second_demands_ids_list) item["first_solutions_ids"] = list(first_solutions_ids_list) item["second_solutions_ids"] = list(second_solutions_ids_list) item["first_positions_ids"] = list(first_positions_ids_list) item["second_positions_ids"] = list(second_positions_ids_list) item["first_classify_ids"] = list(first_classify_ids_list) item["first_classify_names"] = list(first_classify_names_list) item["second_classify_ids"] = list(second_classify_ids_list) item["second_classify_names"] = list(second_classify_names_list) results.append(res) for result in results: if not isinstance(result['closure_tag_ids'], list): result['closure_tag_ids'] = list(map(lambda tag: tag['id'], result['closure_tag_ids'].unwrap())) return results