1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
# coding=utf-8
import datetime
from math import ceil
import logging
from qa.models.answer import ApiAnswerScore
from django.conf import settings
from data_sync.utils import to_epoch
from data_sync.utils import tzlc
from qa.models.answer import QuestionTag, Question, Answer, AnswerVote, AnswerReply, QuestionAnswer
from utils.rpc import get_rpc_invoker
from qa.models.toutiao import by_content_type_id_get_keywords, get_content_star_keywords, get_content_title_keywords, \
get_content_star_first_keyword
from tags.services.tag import (get_tagv3_analysis_info, get_tagv3_ids_by_tagv3_names,
get_first_demand_ids_by_name, get_second_demand_ids_by_name,
get_first_position_ids_by_name, get_second_position_ids_by_name,
get_first_solution_ids_by_name, get_second_solution_ids_by_name,
get_tag_v3_operators_tags)
rpc = get_rpc_invoker()
logger = logging.getLogger(__name__)
def get_score(result):
QUESTION_SCORE_WEIGHT = dict(settings.QUESTION_SCORE_WEIGHT)
now = datetime.datetime.now().timestamp()
t1 = ceil((now - result['create_time_epoch']) / 86400)
t2 = ceil((now - result['answer_last_update_time_epoch']) / 86400)
time_score = t1 - pow((t1 - t2), 1.5) / 2
question_answer_likes_score = max(100, result['answer_likes_num'] / 4)
answers_num_score = max(100, result['answers_num'] * 2)
question_view_score = max(100, ceil(result['views_num'] / 20))
question_score = sum([
answers_num_score * QUESTION_SCORE_WEIGHT['answers_num_score_weight'],
question_answer_likes_score * QUESTION_SCORE_WEIGHT['question_answer_likes_score_weight'],
question_view_score * QUESTION_SCORE_WEIGHT['question_view_score_weight']
]) / (time_score * QUESTION_SCORE_WEIGHT['time_score_weight'])
answer_likes_score = max(100, result['answer_likes_num'] / 2)
answer_views_score = max(100, ceil(result['answer_views_num'] / 20))
answer_score = sum([
answer_likes_score * QUESTION_SCORE_WEIGHT['answer_likes_score_weight'],
answer_views_score * QUESTION_SCORE_WEIGHT['answer_views_score_weight'],
])
return question_score + answer_score
class Score(object):
@classmethod
def get_score(cls, question):
now = datetime.datetime.now()
answer_score = 0
answer_rec = question.answers.filter(is_recommend=True)
if answer_rec:
rec = answer_rec.first()
vote_num = rec.answervote_set.filter(is_fake=False).count()
content_score = cls.get_answer_content_score(rec.level)
social_score = cls.get_social_score(vote_num, rec.replys.count())
time_score = (now - question.create_time).seconds / 3600 * 0.03 * 0.7 + \
(now - rec.create_time).seconds / 3600 * 0.06 * 1.5
answer_score = 0.8 * content_score + 0.2 * social_score - time_score
if answer_score < 0:
logger.warning('answer score < 0, question id:{}, score:{}'.format(question.id, answer_score))
answer_score = max(0, answer_score)
answer_count_score = cls.get_answer_count_score(question.answer_num)
question_time_score = (now - question.create_time).seconds / 3600 * 0.03
a = answer_score * 0.8 + answer_count_score * 0.2 - question_time_score
if a < 0:
logger.warning('question score < 0, question id:{}, score:{}'.format(question.id, a))
return max(0, a)
@staticmethod
def get_answer_content_score(level):
if level < 2:
return 0
elif level < 3:
return 5
elif level < 4:
return 10
elif level < 5:
return 70
else:
return 100
@staticmethod
def get_social_score(likes_num, reply_num):
likes_score = Score.get_likes_score(likes_num)
reply_score = Score.get_reply_score(reply_num)
return 0.4 * likes_score + 0.6 * reply_score
@staticmethod
def get_likes_score(likes_num):
if likes_num <= 5:
return 10
elif likes_num <= 20:
return 20
elif likes_num <= 50:
return 30
elif likes_num <= 70:
return 60
elif likes_num <= 100:
return 70
else:
return 100
@staticmethod
def get_reply_score(reply_num):
if reply_num <= 5:
return 10
elif reply_num <= 20:
return 20
elif reply_num <= 50:
return 30
elif reply_num <= 70:
return 60
elif reply_num <= 100:
return 70
else:
return 100
@staticmethod
def get_answer_count_score(answer_count):
if answer_count <= 2:
return 30
elif answer_count < 6:
return 50
else:
return 70
def get_questions(pks):
results = []
##暂时去掉这些问题ID[230221, 230222, 230223, 230224, 230225, 230255, 230256, 230257,230323]
question_ids = [230221, 230222, 230223, 230224, 230225, 230255, 230256, 230257, 230323]
for question_id in question_ids:
if question_id in pks:
pks.remove(question_id)
queryset = Question.objects.filter(id__in=pks)
if not queryset:
return
tag_ids = list(QuestionTag.objects.filter(question__in=queryset).values_list('tag', flat=True))
user_ids = list(queryset.values_list('user', flat=True))
question_ids = list(queryset.values_list('id', flat=True))
users = rpc['api/user/get_fundamental_info_by_user_ids'](user_ids=user_ids)
tags = rpc['api/tag/info_by_ids'](tag_ids=tag_ids)
doctors = rpc['doctor/user/get_doctors'](user_ids=user_ids)
user_dict = {str(user['id']): user for user in users.unwrap()}
tag_dict = {str(tag['id']): tag for tag in tags.unwrap()}
doctor_list = doctors.unwrap()['doctors']
doctor_user_ids = [str(doctor.get('user') or doctor.get('user_id')) for doctor in doctor_list]
doctor_dict = {str(doctor.get('user') or doctor.get('user_id')): doctor for doctor in doctor_list}
for q in queryset:
res = {
'id': q.id,
'create_time': tzlc(q.create_time),
'create_time_epoch': to_epoch(tzlc(q.create_time)),
'has_cover': True if q.cover_url else False,
'title': q.title,
'content': q.content,
'is_online': q.is_online,
'is_recommend': q.is_recommend,
'answers': [],
'answer_likes_num': 0,
'answer_views_num': 0,
'answers_num': 0,
'has_recommended_answer': False,
'views_num': q.view_amount,
'like_num': q.like_num,
'content_type': q.content_type
}
try:
answer_id = list(QuestionAnswer.objects.filter(question_id=q.id).values_list('answer_id', flat=True))
if answer_id:
res['best_answer'] = int(answer_id[0])
except:
pass
# tag
tag_ids = list(
filter(lambda t: str(t) in tag_dict, QuestionTag.objects.filter(question=q).values_list('tag', flat=True)))
res['tags'] = [tag_dict[str(tag_id)]['name'] for tag_id in tag_ids]
res['tag_ids'] = tag_ids
res['closure_tag_ids'] = rpc['api/tag/closure_tags'](tag_ids=tag_ids) if tag_ids else []
fresh_tag_result = rpc["api/agile_tag/tuple_new_tags"](old_tag_ids=tag_ids)
fresh_tag_id_list = list()
fresh_tag_name_list = list()
for fresh_tag_id, fresh_tag_name in fresh_tag_result.unwrap():
fresh_tag_id_list.append(fresh_tag_id)
fresh_tag_name_list.append(fresh_tag_name)
res["fresh_tags"] = fresh_tag_name_list
res["fresh_closure_tag_ids"] = fresh_tag_id_list
res["fresh_tag_ids"] = fresh_tag_id_list
# user
user = user_dict.get(str(q.user_id), {})
is_doctor = str(q.user_id) in doctor_user_ids
user_info = {
'id': q.user_id,
'is_doctor': is_doctor,
'last_name': user.get('nickname', ''),
}
res['user'] = user_info
if is_doctor:
doctor_data = doctor_dict.get(str(q.user_id))
org_sink_data = doctor_data.get("org_sink")
doctor = {}
doctor['id'] = doctor_data.get("id")
doctor['name'] = doctor_data.get("name")
# 机构罚单下沉
if org_sink_data and org_sink_data[0]:
res['org_sink_start_time'] = tzlc(
datetime.datetime.fromtimestamp(org_sink_data[0]['org_sink_start_time']))
res['org_sink_end_time'] = tzlc(
datetime.datetime.fromtimestamp(org_sink_data[0]['org_sink_start_time']))
res['doctor'] = doctor
for key in ('city_tag_id', 'city_province_tag_id', 'city_province_country_tag_id'):
if key in user:
user_info[key] = user[key]
all_answer_ids = []
all_smk = 0
answers = q.answers.order_by('-update_time')
last_update_time = q.update_time
res['answer_last_update_time_epoch'] = res['create_time_epoch']
last_update_time_answer_reply = q.create_time
for index, answer in enumerate(answers):
if answer.is_online == False or (
res['id'] in [230221, 230222, 230223, 230224, 230225, 230255, 230256, 230257,
230323] and answer.level > 3):
continue
if answer.is_recommend:
res['has_recommended_answer'] = True
all_answer_ids.append(answer.id)
if answer.is_online:
res['answers'].append({
'id': answer.id,
'level': answer.level,
'content': answer.content,
'is_online': answer.is_online,
'is_recommend': answer.is_recommend,
'create_time': tzlc(answer.create_time),
})
reply = AnswerReply.objects.filter(answer_id=answer.id).order_by('-create_time').first()
if reply:
last_update_time_answer_reply = max(answer.create_time, last_update_time_answer_reply,
reply.create_time)
else:
last_update_time_answer_reply = max(answer.create_time, last_update_time_answer_reply)
res['answer_likes_num'] += answer.like_num
res['answer_views_num'] += answer.view_amount
res['answers_num'] += 1
res["last_update_time_answer_reply"] = tzlc(last_update_time_answer_reply)
##根据answer_id去拿所有的smart_ranks
for item in all_answer_ids:
result = ApiAnswerScore.objects.using(settings.DORIS_DB_NAME).filter(answer_id=item).first()
all_smk += result.new_score if result else 0.0
res['all_smk'] = all_smk
try:
answer = q.answers.order_by("-create_time")[0]
last_update_time = answer.create_time
except Answer.DoesNotExist:
pass
except IndexError:
pass
res['last_update_time'] = tzlc(last_update_time)
res['answer_last_update_time_epoch'] = to_epoch(res['last_update_time'])
question_lastest_answer_time = last_update_time if res["answers_num"] else q.create_time
res["question_lastest_answer_time"] = tzlc(question_lastest_answer_time)
res["content_keyword"] = by_content_type_id_get_keywords(id=q.id, content_type="question")
# res["content_star_keyword"] = get_content_star_keywords(id=q.id, content_type="question")
# res["content_star_first_keyword"] = get_content_star_first_keyword(id=q.id, content_type="question")
# res["title_keyword"] = get_content_title_keywords(id=q.id, content_type="question")
# 大杂烩
res['in_whitelist'] = False
res['in_multitopic'] = False
res['question_type'] = q.question_type
res['score'] = Score.get_score(q)
res['operators_add_tags'] = get_tag_v3_operators_tags(content_id=q.id, content_type="question")
(need_refresh_data, second_demands_list, second_solutions_list, second_positions_list,
second_demands_ids_list,
second_solutions_ids_list, second_positions_ids_list,
first_demands_ids_list, first_solutions_ids_list, first_positions_ids_list, first_demands_list,
first_solutions_list, first_positions_list,
project_tags_list, project_tags_ids_list, first_classify_ids_list, first_classify_names_list,
second_classify_ids_list, second_classify_names_list) = get_tagv3_analysis_info(content_id=res["id"],
content_type="question")
if need_refresh_data:
item["tags_v3"] = list(project_tags_list)
item["first_demands"] = list(first_demands_list)
item["second_demands"] = list(second_demands_list)
item["first_solutions"] = list(first_solutions_list)
item["second_solutions"] = list(second_solutions_list)
item["positions"] = list(first_positions_list)
item["second_positions"] = list(second_positions_list)
item["tagv3_ids"] = list(project_tags_ids_list)
item["first_demands_ids"] = list(first_demands_ids_list)
item["second_demands_ids"] = list(second_demands_ids_list)
item["first_solutions_ids"] = list(first_solutions_ids_list)
item["second_solutions_ids"] = list(second_solutions_ids_list)
item["first_positions_ids"] = list(first_positions_ids_list)
item["second_positions_ids"] = list(second_positions_ids_list)
item["first_classify_ids"] = list(first_classify_ids_list)
item["first_classify_names"] = list(first_classify_names_list)
item["second_classify_ids"] = list(second_classify_ids_list)
item["second_classify_names"] = list(second_classify_names_list)
results.append(res)
for result in results:
if not isinstance(result['closure_tag_ids'], list):
result['closure_tag_ids'] = list(map(lambda tag: tag['id'], result['closure_tag_ids'].unwrap()))
return results