tran2es.py 14.2 KB
# coding=utf-8
import datetime
from math import ceil
import logging
from qa.models.answer import ApiAnswerScore
from django.conf import settings
from data_sync.utils import to_epoch
from data_sync.utils import tzlc
from qa.models.answer import QuestionTag, Question, Answer, AnswerVote, AnswerReply, QuestionAnswer
from utils.rpc import get_rpc_invoker
from qa.models.toutiao import by_content_type_id_get_keywords, get_content_star_keywords, get_content_title_keywords, \
    get_content_star_first_keyword
from tags.services.tag import (get_tagv3_analysis_info, get_tagv3_ids_by_tagv3_names,
                               get_first_demand_ids_by_name, get_second_demand_ids_by_name,
                               get_first_position_ids_by_name, get_second_position_ids_by_name,
                               get_first_solution_ids_by_name, get_second_solution_ids_by_name,
                               get_tag_v3_operators_tags)

rpc = get_rpc_invoker()

logger = logging.getLogger(__name__)


def get_score(result):
    QUESTION_SCORE_WEIGHT = dict(settings.QUESTION_SCORE_WEIGHT)
    now = datetime.datetime.now().timestamp()

    t1 = ceil((now - result['create_time_epoch']) / 86400)

    t2 = ceil((now - result['answer_last_update_time_epoch']) / 86400)

    time_score = t1 - pow((t1 - t2), 1.5) / 2

    question_answer_likes_score = max(100, result['answer_likes_num'] / 4)

    answers_num_score = max(100, result['answers_num'] * 2)

    question_view_score = max(100, ceil(result['views_num'] / 20))

    question_score = sum([
        answers_num_score * QUESTION_SCORE_WEIGHT['answers_num_score_weight'],
        question_answer_likes_score * QUESTION_SCORE_WEIGHT['question_answer_likes_score_weight'],
        question_view_score * QUESTION_SCORE_WEIGHT['question_view_score_weight']
    ]) / (time_score * QUESTION_SCORE_WEIGHT['time_score_weight'])

    answer_likes_score = max(100, result['answer_likes_num'] / 2)

    answer_views_score = max(100, ceil(result['answer_views_num'] / 20))

    answer_score = sum([
        answer_likes_score * QUESTION_SCORE_WEIGHT['answer_likes_score_weight'],
        answer_views_score * QUESTION_SCORE_WEIGHT['answer_views_score_weight'],
    ])
    return question_score + answer_score


class Score(object):
    @classmethod
    def get_score(cls, question):
        now = datetime.datetime.now()
        answer_score = 0

        answer_rec = question.answers.filter(is_recommend=True)
        if answer_rec:
            rec = answer_rec.first()
            vote_num = rec.answervote_set.filter(is_fake=False).count()
            content_score = cls.get_answer_content_score(rec.level)
            social_score = cls.get_social_score(vote_num, rec.replys.count())
            time_score = (now - question.create_time).seconds / 3600 * 0.03 * 0.7 + \
                         (now - rec.create_time).seconds / 3600 * 0.06 * 1.5

            answer_score = 0.8 * content_score + 0.2 * social_score - time_score

        if answer_score < 0:
            logger.warning('answer score < 0, question id:{}, score:{}'.format(question.id, answer_score))

        answer_score = max(0, answer_score)

        answer_count_score = cls.get_answer_count_score(question.answer_num)
        question_time_score = (now - question.create_time).seconds / 3600 * 0.03

        a = answer_score * 0.8 + answer_count_score * 0.2 - question_time_score
        if a < 0:
            logger.warning('question score < 0, question id:{}, score:{}'.format(question.id, a))
        return max(0, a)

    @staticmethod
    def get_answer_content_score(level):
        if level < 2:
            return 0
        elif level < 3:
            return 5
        elif level < 4:
            return 10
        elif level < 5:
            return 70
        else:
            return 100

    @staticmethod
    def get_social_score(likes_num, reply_num):
        likes_score = Score.get_likes_score(likes_num)
        reply_score = Score.get_reply_score(reply_num)
        return 0.4 * likes_score + 0.6 * reply_score

    @staticmethod
    def get_likes_score(likes_num):
        if likes_num <= 5:
            return 10
        elif likes_num <= 20:
            return 20
        elif likes_num <= 50:
            return 30
        elif likes_num <= 70:
            return 60
        elif likes_num <= 100:
            return 70
        else:
            return 100

    @staticmethod
    def get_reply_score(reply_num):
        if reply_num <= 5:
            return 10
        elif reply_num <= 20:
            return 20
        elif reply_num <= 50:
            return 30
        elif reply_num <= 70:
            return 60
        elif reply_num <= 100:
            return 70
        else:
            return 100

    @staticmethod
    def get_answer_count_score(answer_count):
        if answer_count <= 2:
            return 30
        elif answer_count < 6:
            return 50
        else:
            return 70


def get_questions(pks):
    results = []
    ##暂时去掉这些问题ID[230221, 230222, 230223, 230224, 230225, 230255, 230256, 230257,230323]
    question_ids = [230221, 230222, 230223, 230224, 230225, 230255, 230256, 230257, 230323]
    for question_id in question_ids:
        if question_id in pks:
            pks.remove(question_id)
    queryset = Question.objects.filter(id__in=pks)
    if not queryset:
        return
    tag_ids = list(QuestionTag.objects.filter(question__in=queryset).values_list('tag', flat=True))
    user_ids = list(queryset.values_list('user', flat=True))
    question_ids = list(queryset.values_list('id', flat=True))

    users = rpc['api/user/get_fundamental_info_by_user_ids'](user_ids=user_ids)
    tags = rpc['api/tag/info_by_ids'](tag_ids=tag_ids)
    doctors = rpc['doctor/user/get_doctors'](user_ids=user_ids)
    user_dict = {str(user['id']): user for user in users.unwrap()}
    tag_dict = {str(tag['id']): tag for tag in tags.unwrap()}
    doctor_list = doctors.unwrap()['doctors']
    doctor_user_ids = [str(doctor.get('user') or doctor.get('user_id')) for doctor in doctor_list]
    doctor_dict = {str(doctor.get('user') or doctor.get('user_id')): doctor for doctor in doctor_list}

    for q in queryset:

        res = {
            'id': q.id,
            'create_time': tzlc(q.create_time),
            'create_time_epoch': to_epoch(tzlc(q.create_time)),
            'has_cover': True if q.cover_url else False,
            'title': q.title,
            'content': q.content,
            'is_online': q.is_online,
            'is_recommend': q.is_recommend,
            'answers': [],
            'answer_likes_num': 0,
            'answer_views_num': 0,
            'answers_num': 0,
            'has_recommended_answer': False,
            'views_num': q.view_amount,
            'like_num': q.like_num,
            'content_type': q.content_type
        }
        try:
            answer_id = list(QuestionAnswer.objects.filter(question_id=q.id).values_list('answer_id', flat=True))
            if answer_id:
                res['best_answer'] = int(answer_id[0])
        except:
            pass
        # tag
        tag_ids = list(
            filter(lambda t: str(t) in tag_dict, QuestionTag.objects.filter(question=q).values_list('tag', flat=True)))
        res['tags'] = [tag_dict[str(tag_id)]['name'] for tag_id in tag_ids]
        res['tag_ids'] = tag_ids
        res['closure_tag_ids'] = rpc['api/tag/closure_tags'](tag_ids=tag_ids) if tag_ids else []

        fresh_tag_result = rpc["api/agile_tag/tuple_new_tags"](old_tag_ids=tag_ids)
        fresh_tag_id_list = list()
        fresh_tag_name_list = list()
        for fresh_tag_id, fresh_tag_name in fresh_tag_result.unwrap():
            fresh_tag_id_list.append(fresh_tag_id)
            fresh_tag_name_list.append(fresh_tag_name)
        res["fresh_tags"] = fresh_tag_name_list
        res["fresh_closure_tag_ids"] = fresh_tag_id_list
        res["fresh_tag_ids"] = fresh_tag_id_list

        # user
        user = user_dict.get(str(q.user_id), {})
        is_doctor = str(q.user_id) in doctor_user_ids

        user_info = {
            'id': q.user_id,
            'is_doctor': is_doctor,
            'last_name': user.get('nickname', ''),
        }
        res['user'] = user_info
        if is_doctor:
            doctor_data = doctor_dict.get(str(q.user_id))
            org_sink_data = doctor_data.get("org_sink")
            doctor = {}
            doctor['id'] = doctor_data.get("id")
            doctor['name'] = doctor_data.get("name")
            # 机构罚单下沉
            if org_sink_data and org_sink_data[0]:
                res['org_sink_start_time'] = tzlc(
                    datetime.datetime.fromtimestamp(org_sink_data[0]['org_sink_start_time']))
                res['org_sink_end_time'] = tzlc(
                    datetime.datetime.fromtimestamp(org_sink_data[0]['org_sink_start_time']))

            res['doctor'] = doctor

        for key in ('city_tag_id', 'city_province_tag_id', 'city_province_country_tag_id'):
            if key in user:
                user_info[key] = user[key]

        all_answer_ids = []
        all_smk = 0

        answers = q.answers.order_by('-update_time')
        last_update_time = q.update_time
        res['answer_last_update_time_epoch'] = res['create_time_epoch']
        last_update_time_answer_reply = q.create_time
        for index, answer in enumerate(answers):
            if answer.is_online == False or (
                    res['id'] in [230221, 230222, 230223, 230224, 230225, 230255, 230256, 230257,
                                  230323] and answer.level > 3):
                continue
            if answer.is_recommend:
                res['has_recommended_answer'] = True

            all_answer_ids.append(answer.id)
            if answer.is_online:
                res['answers'].append({
                    'id': answer.id,
                    'level': answer.level,
                    'content': answer.content,
                    'is_online': answer.is_online,
                    'is_recommend': answer.is_recommend,
                    'create_time': tzlc(answer.create_time),
                })
                reply = AnswerReply.objects.filter(answer_id=answer.id).order_by('-create_time').first()
                if reply:
                    last_update_time_answer_reply = max(answer.create_time, last_update_time_answer_reply,
                                                        reply.create_time)
                else:
                    last_update_time_answer_reply = max(answer.create_time, last_update_time_answer_reply)

            res['answer_likes_num'] += answer.like_num
            res['answer_views_num'] += answer.view_amount
            res['answers_num'] += 1
        res["last_update_time_answer_reply"] = tzlc(last_update_time_answer_reply)
        ##根据answer_id去拿所有的smart_ranks
        for item in all_answer_ids:
            result = ApiAnswerScore.objects.using(settings.DORIS_DB_NAME).filter(answer_id=item).first()
            all_smk += result.new_score if result else 0.0

        res['all_smk'] = all_smk

        try:
            answer = q.answers.order_by("-create_time")[0]
            last_update_time = answer.create_time
        except Answer.DoesNotExist:
            pass
        except IndexError:
            pass

        res['last_update_time'] = tzlc(last_update_time)
        res['answer_last_update_time_epoch'] = to_epoch(res['last_update_time'])

        question_lastest_answer_time = last_update_time if res["answers_num"] else q.create_time
        res["question_lastest_answer_time"] = tzlc(question_lastest_answer_time)
        res["content_keyword"] = by_content_type_id_get_keywords(id=q.id, content_type="question")
        # res["content_star_keyword"] = get_content_star_keywords(id=q.id, content_type="question")
        # res["content_star_first_keyword"] = get_content_star_first_keyword(id=q.id, content_type="question")
        # res["title_keyword"] = get_content_title_keywords(id=q.id, content_type="question")

        # 大杂烩
        res['in_whitelist'] = False
        res['in_multitopic'] = False
        res['question_type'] = q.question_type
        res['score'] = Score.get_score(q)
        res['operators_add_tags'] = get_tag_v3_operators_tags(content_id=q.id, content_type="question")

        (need_refresh_data, second_demands_list, second_solutions_list, second_positions_list,
         second_demands_ids_list,
         second_solutions_ids_list, second_positions_ids_list,
         first_demands_ids_list, first_solutions_ids_list, first_positions_ids_list, first_demands_list,
         first_solutions_list, first_positions_list,
         project_tags_list, project_tags_ids_list, first_classify_ids_list, first_classify_names_list,
         second_classify_ids_list, second_classify_names_list) = get_tagv3_analysis_info(content_id=res["id"],
                                                                                         content_type="question")
        if need_refresh_data:
            item["tags_v3"] = list(project_tags_list)
            item["first_demands"] = list(first_demands_list)
            item["second_demands"] = list(second_demands_list)
            item["first_solutions"] = list(first_solutions_list)
            item["second_solutions"] = list(second_solutions_list)
            item["positions"] = list(first_positions_list)
            item["second_positions"] = list(second_positions_list)
            item["tagv3_ids"] = list(project_tags_ids_list)
            item["first_demands_ids"] = list(first_demands_ids_list)
            item["second_demands_ids"] = list(second_demands_ids_list)
            item["first_solutions_ids"] = list(first_solutions_ids_list)
            item["second_solutions_ids"] = list(second_solutions_ids_list)
            item["first_positions_ids"] = list(first_positions_ids_list)
            item["second_positions_ids"] = list(second_positions_ids_list)
            item["first_classify_ids"] = list(first_classify_ids_list)
            item["first_classify_names"] = list(first_classify_names_list)
            item["second_classify_ids"] = list(second_classify_ids_list)
            item["second_classify_names"] = list(second_classify_names_list)

        results.append(res)

    for result in results:
        if not isinstance(result['closure_tag_ids'], list):
            result['closure_tag_ids'] = list(map(lambda tag: tag['id'], result['closure_tag_ids'].unwrap()))

    return results