Commit bdb93389 authored by lixiaofang's avatar lixiaofang

update es

parent 45e3a766
......@@ -2,7 +2,8 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="dbf86a46-510b-4304-905e-cf96c87ff27d" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/libs/es.py" beforeDir="false" afterPath="$PROJECT_DIR$/libs/es.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/search/utils/user.py" beforeDir="false" afterPath="$PROJECT_DIR$/search/utils/user.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
......@@ -24,7 +25,7 @@
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="json" value="11" />
<entry key="py" value="74" />
<entry key="py" value="75" />
<entry key="txt" value="1" />
<entry key="xml" value="1" />
</counts>
......@@ -33,7 +34,7 @@
<counts>
<entry key="JSON" value="11" />
<entry key="PLAIN_TEXT" value="1" />
<entry key="Python" value="74" />
<entry key="Python" value="75" />
<entry key="XML" value="1" />
</counts>
</usages-collector>
......@@ -41,14 +42,14 @@
<counts>
<entry key="dummy" value="9" />
<entry key="json" value="100" />
<entry key="py" value="545" />
<entry key="py" value="546" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.edit">
<counts>
<entry key="JSON" value="100" />
<entry key="PLAIN_TEXT" value="9" />
<entry key="Python" value="545" />
<entry key="Python" value="546" />
</counts>
</usages-collector>
</session>
......@@ -67,16 +68,24 @@
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/libs/es.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-135">
<caret line="174" column="13" lean-forward="true" selection-start-line="174" selection-end-line="175" />
<state relative-caret-position="383">
<caret line="184" column="10" selection-start-line="184" selection-end-line="185" />
<folding>
<marker date="1548062012000" expanded="true" signature="5756:5757" ph="..." />
<marker date="1548062012000" expanded="true" signature="5985:5986" ph="..." />
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/search/utils/user.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180">
<caret line="267" column="42" selection-start-line="267" selection-start-column="42" selection-end-line="267" selection-end-column="42" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/search/views/contrast_similar.py">
<provider selected="true" editor-type-id="text-editor">
......@@ -166,13 +175,14 @@
<option value="$PROJECT_DIR$/trans2es/models/contrast_similar.py" />
<option value="$PROJECT_DIR$/physical/settings.py" />
<option value="$PROJECT_DIR$/search/views/contrast_similar.py" />
<option value="$PROJECT_DIR$/search/utils/user.py" />
<option value="$PROJECT_DIR$/libs/es.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds">
<option name="x" value="-13" />
<option name="y" value="23" />
<option name="x" value="-128" />
<option name="y" value="38" />
<option name="width" value="1280" />
<option name="height" value="736" />
</component>
......@@ -216,6 +226,23 @@
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="search" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="search" type="462c0819:PsiDirectoryNode" />
<item name="utils" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="search" type="462c0819:PsiDirectoryNode" />
<item name="views" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
......@@ -273,7 +300,7 @@
<servers />
</component>
<component name="ToolWindowManager">
<frame x="-13" y="23" width="1280" height="736" extended-state="0" />
<frame x="-128" y="38" width="1280" height="736" extended-state="0" />
<editor active="true" />
<layout>
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.28911835" />
......@@ -289,7 +316,7 @@
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="bottom" id="Version Control" order="7" weight="0.017770598" />
<window_info anchor="bottom" id="DB Execution Console" order="8" />
<window_info anchor="bottom" id="Terminal" order="9" visible="true" weight="0.273752" />
<window_info anchor="bottom" id="Terminal" order="9" visible="true" weight="0.104669884" />
<window_info anchor="bottom" id="Event Log" order="10" side_tool="true" />
<window_info anchor="bottom" id="Python Console" order="11" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
......@@ -550,13 +577,19 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/utils/user.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180">
<caret line="267" column="42" selection-start-line="267" selection-start-column="42" selection-end-line="267" selection-end-column="42" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/libs/es.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-135">
<caret line="174" column="13" lean-forward="true" selection-start-line="174" selection-end-line="175" />
<state relative-caret-position="383">
<caret line="184" column="10" selection-start-line="184" selection-end-line="185" />
<folding>
<marker date="1548062012000" expanded="true" signature="5756:5757" ph="..." />
<marker date="1548062012000" expanded="true" signature="5985:5986" ph="..." />
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
......
......@@ -17,7 +17,6 @@ class ESPerform(object):
cli_info_list = settings.ES_INFO_LIST
index_prefix = settings.ES_INDEX_PREFIX
@classmethod
def get_cli(cls):
try:
......@@ -28,7 +27,7 @@ class ESPerform(object):
return None
@classmethod
def get_official_index_name(cls,sub_index_name,index_flag=None):
def get_official_index_name(cls, sub_index_name, index_flag=None):
"""
:remark:get official es index name
:param sub_index_name:
......@@ -36,7 +35,7 @@ class ESPerform(object):
:return:
"""
try:
assert (index_flag in [None,"read","write"])
assert (index_flag in [None, "read", "write"])
official_index_name = cls.index_prefix + "-" + sub_index_name
if index_flag:
......@@ -48,11 +47,11 @@ class ESPerform(object):
return None
@classmethod
def __load_mapping(cls,doc_type):
def __load_mapping(cls, doc_type):
try:
mapping_file_path = os.path.join(
os.path.dirname(__file__),
'..', 'trans2es','mapping', '%s.json' % (doc_type,))
'..', 'trans2es', 'mapping', '%s.json' % (doc_type,))
mapping = ''
with open(mapping_file_path, 'r') as f:
for line in f:
......@@ -65,7 +64,7 @@ class ESPerform(object):
return None
@classmethod
def create_index(cls,es_cli,sub_index_name):
def create_index(cls, es_cli, sub_index_name):
"""
:remark: create es index,alias index
:param sub_index_name:
......@@ -78,11 +77,11 @@ class ESPerform(object):
index_exist = es_cli.indices.exists(official_index_name)
if not index_exist:
es_cli.indices.create(official_index_name)
read_alias_name = cls.get_official_index_name(sub_index_name,"read")
es_cli.indices.put_alias(official_index_name,read_alias_name)
read_alias_name = cls.get_official_index_name(sub_index_name, "read")
es_cli.indices.put_alias(official_index_name, read_alias_name)
write_alias_name = cls.get_official_index_name(sub_index_name,"write")
es_cli.indices.put_alias(official_index_name,write_alias_name)
write_alias_name = cls.get_official_index_name(sub_index_name, "write")
es_cli.indices.put_alias(official_index_name, write_alias_name)
return True
except:
......@@ -90,7 +89,7 @@ class ESPerform(object):
return False
@classmethod
def put_index_mapping(cls,es_cli,sub_index_name,mapping_type="_doc"):
def put_index_mapping(cls, es_cli, sub_index_name, mapping_type="_doc"):
"""
:remark: put index mapping
:param es_cli:
......@@ -101,13 +100,13 @@ class ESPerform(object):
try:
assert (es_cli is not None)
write_alias_name = cls.get_official_index_name(sub_index_name,"write")
write_alias_name = cls.get_official_index_name(sub_index_name, "write")
index_exist = es_cli.indices.exists(write_alias_name)
if not index_exist:
return False
mapping_dict = cls.__load_mapping(sub_index_name)
es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type)
es_cli.indices.put_mapping(index=write_alias_name, body=mapping_dict, doc_type=mapping_type)
return True
except:
......@@ -115,7 +114,7 @@ class ESPerform(object):
return False
@classmethod
def put_indices_template(cls,es_cli,template_file_name, template_name):
def put_indices_template(cls, es_cli, template_file_name, template_name):
"""
:remark put index template
:param es_cli:
......@@ -127,7 +126,7 @@ class ESPerform(object):
assert (es_cli is not None)
mapping_dict = cls.__load_mapping(template_file_name)
es_cli.indices.put_template(name=template_name,body=mapping_dict)
es_cli.indices.put_template(name=template_name, body=mapping_dict)
return True
except:
......@@ -135,7 +134,7 @@ class ESPerform(object):
return False
@classmethod
def es_helpers_bulk(cls,es_cli,data_list,sub_index_name,auto_create_index=False,doc_type="_doc"):
def es_helpers_bulk(cls, es_cli, data_list, sub_index_name, auto_create_index=False, doc_type="_doc"):
try:
assert (es_cli is not None)
......@@ -146,8 +145,8 @@ class ESPerform(object):
logging.error("index:%s is not existing,bulk data error!" % official_index_name)
return False
else:
cls.create_index(es_cli,sub_index_name)
cls.put_index_mapping(es_cli,sub_index_name)
cls.create_index(es_cli, sub_index_name)
cls.put_index_mapping(es_cli, sub_index_name)
bulk_actions = []
for data in data_list:
......@@ -158,7 +157,7 @@ class ESPerform(object):
'_id': data['id'],
'_source': data,
})
elasticsearch.helpers.bulk(es_cli,bulk_actions)
elasticsearch.helpers.bulk(es_cli, bulk_actions)
return True
except:
......@@ -166,34 +165,43 @@ class ESPerform(object):
return False
@classmethod
def get_search_results(cls, es_cli,sub_index_name,query_body,offset=0,size=10,
auto_create_index=False,doc_type="_doc",aggregations_query=False,is_suggest_request=False):
def get_search_results(cls, es_cli, sub_index_name, query_body, offset=0, size=10,
auto_create_index=False, doc_type="_doc", aggregations_query=False, is_suggest_request=False,
batch_search=False):
try:
assert (es_cli is not None)
official_index_name = cls.get_official_index_name(sub_index_name,"read")
official_index_name = cls.get_official_index_name(sub_index_name, "read")
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
if not auto_create_index:
logging.error("index:%s is not existing,get_search_results error!" % official_index_name)
return None
else:
cls.create_index(es_cli,sub_index_name)
cls.put_index_mapping(es_cli,sub_index_name)
cls.create_index(es_cli, sub_index_name)
cls.put_index_mapping(es_cli, sub_index_name)
logging.info("duan add,query_body:%s" % str(query_body).encode("utf-8"))
res = es_cli.search(index=official_index_name,doc_type=doc_type,body=query_body,from_=offset,size=size)
if is_suggest_request:
return res
if not batch_search:
res = es_cli.search(index=official_index_name, doc_type=doc_type, body=query_body, from_=offset,
size=size)
if is_suggest_request:
return res
else:
result_dict = {
"total_count": res["hits"]["total"],
"hits": res["hits"]["hits"]
}
if aggregations_query:
result_dict["aggregations"] = res["aggregations"]
return result_dict
else:
result_dict = {
"total_count":res["hits"]["total"],
"hits":res["hits"]["hits"]
}
if aggregations_query:
result_dict["aggregations"] = res["aggregations"]
return result_dict
res = es_cli.msearch(body=query_body, index=official_index_name, doc_type=doc_type)
logging.info("duan add,msearch res:%s" % str(res))
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"total_count":0,"hits":[]}
return {"total_count": 0, "hits": []}
......@@ -11,7 +11,7 @@ from libs.es import ESPerform
class UserUtils(object):
@classmethod
def get_batch_attention_user_list(cls,user_id_list,self_user_id):
def get_batch_attention_user_list(cls, user_id_list, self_user_id):
"""
:remark 批量用户 关注的 用户列表
:param user_id_list:
......@@ -23,12 +23,12 @@ class UserUtils(object):
q = dict()
q["query"] = {
"terms":{
"user_id":user_id_list
"terms": {
"user_id": user_id_list
}
}
q["_source"] = {
"include":["attention_user_id_list","user_id"]
"include": ["attention_user_id_list", "user_id"]
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset=0, size=len(user_id_list))
......@@ -42,7 +42,8 @@ class UserUtils(object):
user_id = hit_item["_source"]["user_id"]
if user_id == self_user_id:
self_attention_user_id_list = [item["user_id"] for item in hit_item["_source"]["attention_user_id_list"]]
self_attention_user_id_list = [item["user_id"] for item in
hit_item["_source"]["attention_user_id_list"]]
else:
attention_user_id_list = [item["user_id"] for item in hit_item["_source"]["attention_user_id_list"]]
......@@ -50,14 +51,13 @@ class UserUtils(object):
attention_user_dict_list.append(attention_user_dict)
ret_attention_user_id_list.append(user_id)
return [self_attention_user_id_list,attention_user_dict_list,ret_attention_user_id_list]
return [self_attention_user_id_list, attention_user_dict_list, ret_attention_user_id_list]
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[],[])
return ([], [], [])
@classmethod
def get_attention_user_list(cls,user_id_list,self_user_id):
def get_attention_user_list(cls, user_id_list, self_user_id):
"""
:remark 获取指定用户列表 关注的 用户列表
:param user_id:
......@@ -66,12 +66,12 @@ class UserUtils(object):
try:
q = dict()
q["query"] = {
"terms":{
"user_id":user_id_list
"terms": {
"user_id": user_id_list
}
}
q["_source"] = {
"include":["attention_user_id_list","user_id"]
"include": ["attention_user_id_list", "user_id"]
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset=0, size=len(user_id_list))
......@@ -80,23 +80,25 @@ class UserUtils(object):
recursion_attention_user_id_list = []
for hit_item in result_dict["hits"]:
if hit_item["_source"]["user_id"] == self_user_id:
self_attention_user_id_list = [item["user_id"] for item in hit_item["_source"]["attention_user_id_list"]]
self_attention_user_id_list = [item["user_id"] for item in
hit_item["_source"]["attention_user_id_list"]]
else:
recursion_attention_user_id_list = [item["user_id"] for item in hit_item["_source"]["attention_user_id_list"]]
recursion_attention_user_id_list = [item["user_id"] for item in
hit_item["_source"]["attention_user_id_list"]]
return (self_attention_user_id_list,recursion_attention_user_id_list)
return (self_attention_user_id_list, recursion_attention_user_id_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
return ([], [])
@classmethod
def ___get_should_term_list(cls,ori_list,field_name="tag_list"):
def ___get_should_term_list(cls, ori_list, field_name="tag_list"):
try:
should_term_list = list()
for term_id in ori_list:
term_dict = {
"term":{
field_name:{"value":term_id}
"term": {
field_name: {"value": term_id}
}
}
should_term_list.append(term_dict)
......@@ -106,7 +108,7 @@ class UserUtils(object):
return []
@classmethod
def get_recommend_user_list(cls,self_attention_user_id_list,recursion_attention_user_id_list,offset,size):
def get_recommend_user_list(cls, self_attention_user_id_list, recursion_attention_user_id_list, offset, size):
"""
:remark 获取推荐用户列表
:param attention_user_id_list:
......@@ -134,12 +136,12 @@ class UserUtils(object):
if len(recursion_attention_user_id_list) > 0:
functions_list.append(
{
"filter":{
"bool":{
"should":{"terms":{"user_id":recursion_attention_user_id_list}}
"filter": {
"bool": {
"should": {"terms": {"user_id": recursion_attention_user_id_list}}
}
},
"weight":10
"weight": 10
}
)
......@@ -152,9 +154,9 @@ class UserUtils(object):
{"term": {"is_deleted": False}},
{"term": {"is_shadow": False}}
],
"must_not":{
"terms":{
"user_id":self_attention_user_id_list
"must_not": {
"terms": {
"user_id": self_attention_user_id_list
}
}
}
......@@ -165,7 +167,7 @@ class UserUtils(object):
}
q["query"]["function_score"] = query_function_score
q["_source"] = {
"include":["user_id"]
"include": ["user_id"]
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="user", query_body=q,
offset=offset, size=size)
......@@ -179,9 +181,9 @@ class UserUtils(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
@classmethod
def get_batch_recommend_user_dict(cls,need_filter_attention_user_id_list,attention_user_id_list,attention_user_dict_list,self_user_id,offset,size):
def get_batch_recommend_user_dict(cls, need_filter_attention_user_id_list, attention_user_id_list,
attention_user_dict_list, self_user_id, offset, size):
"""
:remark 获取批量推荐用户
:param need_filter_attention_user_id_list:
......@@ -220,12 +222,13 @@ class UserUtils(object):
if len(interesting_user_item_dict[interesting_user_id]) > 0:
functions_list.append(
{
"filter":{
"bool":{
"should":{"terms":{"user_id":interesting_user_item_dict[interesting_user_id]}}
"filter": {
"bool": {
"should": {
"terms": {"user_id": interesting_user_item_dict[interesting_user_id]}}
}
},
"weight":10
"weight": 10
}
)
......@@ -238,9 +241,9 @@ class UserUtils(object):
{"term": {"is_deleted": False}},
{"term": {"is_shadow": False}}
],
"must_not":{
"terms":{
"user_id":filter_user_id_list
"must_not": {
"terms": {
"user_id": filter_user_id_list
}
}
}
......@@ -251,18 +254,19 @@ class UserUtils(object):
}
q["query"]["function_score"] = query_function_score
q["_source"] = {
"include":["user_id"]
"include": ["user_id"]
}
batch_query_list.append(q)
index_name = ESPerform.get_official_index_name("user","read")
index_name = ESPerform.get_official_index_name("user", "read")
search_header_dict = {'index': index_name, 'type': "_doc"}
query_body = ""
for query_item in batch_query_list:
query_body += "{}\n{}\n".format(json.dumps(search_header_dict),json.dumps(query_item))
query_body += "{}\n{}\n".format(json.dumps(search_header_dict), json.dumps(query_item))
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="user", query_body=query_body,
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="user",
query_body=query_body,
batch_search=True)
ret_dict = dict()
user_index = 0
......@@ -271,11 +275,11 @@ class UserUtils(object):
for item in res_item["hits"]["hits"]:
recommend_user_list.append(item["_source"]["user_id"])
logging.info("duan add,attention_user_id_list:%s,user_index:%d" % (str(attention_user_id_list), user_index))
logging.info(
"duan add,attention_user_id_list:%s,user_index:%d" % (str(attention_user_id_list), user_index))
ret_dict[str(attention_user_id_list[user_index])] = recommend_user_list
user_index += 1
logging.info("duan add,ret_dict:%s" % str(ret_dict))
return ret_dict
except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment