1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# -*- coding: utf-8 -*-
from django.db import models
from gm_types.doris import CARD_TYPE
import traceback
import logging
from django.conf import settings
from elasticsearch import Elasticsearch as Es
class TouTiaoTag(models.Model):
class Meta:
verbose_name = u'tag标签映射'
app_label = u'api'
toutiao = models.CharField(u'源标签', max_length=40, unique=True)
is_online = models.BooleanField(u'是否在线', default=True)
is_related = models.BooleanField(u'是否关联', default=False)
create_time = models.DateTimeField(u'创建时间', auto_now_add=True)
count = models.IntegerField('关联标签数量', default=0)
class ToutiaoRelation(models.Model):
class Meta:
verbose_name = u'趣头条关系表'
app_label = u'api'
toutiaotag = models.ForeignKey(TouTiaoTag, verbose_name=u'头条', related_name='relationtags')
tag = models.CharField(u'关联tag', max_length=40, null=True)
class ContentKeywordExtraction(models.Model):
class Meta:
verbose_name = u'不同内容的医美关键词'
app_label = u'doris'
db_table = "content_keyword_extraction"
id = models.TextField(verbose_name="卡片ID")
keywords = models.TextField(verbose_name="关键词信息", default=None)
star_keywords = models.TextField(verbose_name="明星关键词信息", default=None)
title_keywords = models.TextField(verbose_name="标题关键词信息", default=None)
content_type = models.TextField(verbose_name="类型")
time = models.TextField(verbose_name="更新时间")
def by_content_type_id_get_keywords(id, content_type):
try:
all_keyword = []
data = ContentKeywordExtraction.objects.filter(id=str(id), content_type=content_type).values_list("keywords",
flat=True).first()
if data:
all_keyword = (data.split(","))
return all_keyword
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
def get_es(es_hosts_config=None):
init_args = {
# 'hosts':settings.ES_HOSTS,
# no sniffing
'sniff_on_start': False,
'sniff_on_connection_fail': False,
}
new_hosts = settings.ES7_HOSTS if not es_hosts_config else es_hosts_config
new_es = Es(hosts=new_hosts, http_auth=(settings.HTTP_AUTH_NAME, settings.HTTP_AUTH_PWD), **init_args)
return new_es
def get_service_es(es_hosts_config=None):
init_args = {
'sniff_on_start': False,
'sniff_on_connection_fail': False,
}
new_hosts = settings.ES7_SERVICE_HOSTS if not es_hosts_config else es_hosts_config
new_es = Es(hosts=new_hosts, http_auth=(settings.HTTP_AUTH_NAME, settings.HTTP_AUTH_PWD), **init_args)
return new_es
def es_index_adapt(index_prefix, doc_type, rw=None):
"""get the adapted index name
"""
assert rw in [None, 'read', 'write']
index = '-'.join((index_prefix, doc_type))
if rw:
index = '-'.join((index, rw))
return index
def es_query(doc, body, offset, size, es=None):
if es is None:
es = get_es()
index = es_index_adapt(
index_prefix=settings.ES_INDEX_PREFIX,
doc_type=doc,
rw='read'
)
res = es.search(
index=index,
doc_type=doc,
timeout=settings.ES_SEARCH_TIMEOUT,
body=body,
from_=offset,
size=size)
return res
def es_query_service(doc, body, offset, size, es=None):
if es is None:
es = get_service_es()
index = es_index_adapt(
index_prefix=settings.ES_INDEX_PREFIX,
doc_type=doc,
rw='read'
)
res = es.search(
index=index,
doc_type=doc,
timeout=settings.ES_SEARCH_TIMEOUT,
body=body,
from_=offset,
size=size)
return res
def has_service(tag_list, content_keyword):
q = dict()
q["query"] = {"bool":
{"must": [{'term': {'is_online': True}},
{"nested": {"path": "sku_list", "query": {
"bool": {"must": [{"range": {"sku_list.start_time": {"lte": "now"}}},
{"range": {"sku_list.end_time": {"gt": "now"}}}]}}}}
],
"should": [{"terms": {"closure_tag_ids": tag_list}},
{"terms": {"closure_tags": content_keyword}}],
"minimum_should_match": 1}}
res = es_query_service(doc="service", body=q, offset=0, size=1)
if res:
if res['hits']['total']['value'] > 0:
return True
else:
return False
else:
return False
def get_content_star_keywords(id, content_type):
try:
keywords = []
data = ContentKeywordExtraction.objects.filter(id=str(id), content_type=content_type).values_list(
"star_keywords", flat=True).first()
if data:
keywords = (data.split(","))
return keywords
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
def get_content_star_first_keyword(id, content_type):
keywords = get_content_star_keywords(id, content_type)
res = ""
if len(keywords) > 0:
res = keywords[0]
return res
def get_content_title_keywords(id, content_type):
try:
keywords = []
data = ContentKeywordExtraction.objects.filter(id=str(id), content_type=content_type).values_list(
"title_keywords", flat=True).first()
if data:
keywords = (data.split(","))
return keywords
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []