Commit e0f8a405 authored by 赵威's avatar 赵威

scan es

parent 796fe29d
...@@ -8,7 +8,7 @@ import faiss ...@@ -8,7 +8,7 @@ import faiss
import numpy as np import numpy as np
from gm_rpcd.all import bind from gm_rpcd.all import bind
from utils.cache import redis_client3 from utils.cache import redis_client3
from utils.es import es_query from utils.es import es_query, es_scan
from utils.images import face_to_vec, url_to_ndarray from utils.images import face_to_vec, url_to_ndarray
base_dir = os.getcwd() base_dir = os.getcwd()
...@@ -76,21 +76,25 @@ def save_diary_image_info(save_file): ...@@ -76,21 +76,25 @@ def save_diary_image_info(save_file):
} }
} }
count = 0 count = 0
results = es_scan("diary", q)
with open(save_file, "w") as f: with open(save_file, "w") as f:
res_dict = es_query("diary", q, offset=0, size=200000) for item in results:
for item in res_dict["hits"]["hits"]: count += 1
diary_id = item["_source"]["id"] print(item["_id"], item["_source"]["after_cover_url"] + "-w", + str(count))
# before_cover_url = item["_source"]["before_cover_url"] + "-w" # res_dict = es_query("diary", q, offset=0, size=200000)
after_cover_url = item["_source"]["after_cover_url"] + "-w" # for item in res_dict["hits"]["hits"]:
img = url_to_ndarray(after_cover_url) # diary_id = item["_source"]["id"]
if img.any(): # # before_cover_url = item["_source"]["before_cover_url"] + "-w"
count += 1 # after_cover_url = item["_source"]["after_cover_url"] + "-w"
print("count: " + str(count) + " " + str(diary_id)) # img = url_to_ndarray(after_cover_url)
faces = FACE_TO_VEC_FUN(img) # if img.any():
for face in faces: # count += 1
line = str(diary_id) + "\t" + face["feature"] + "\n" # print("count: " + str(count) + " " + str(diary_id))
# print(line) # faces = FACE_TO_VEC_FUN(img)
f.write(line) # for face in faces:
# line = str(diary_id) + "\t" + face["feature"] + "\n"
# # print(line)
# f.write(line)
def save_faiss_index(load_file, save_path): def save_faiss_index(load_file, save_path):
...@@ -178,48 +182,48 @@ def get_similar_diary_ids_by_face_features(feature, limit=0.1): ...@@ -178,48 +182,48 @@ def get_similar_diary_ids_by_face_features(feature, limit=0.1):
def main(): def main():
# save_diary_image_info(diary_after_cover_vec_file) save_diary_image_info(diary_after_cover_vec_file)
# save_faiss_index(diary_after_cover_vec_file, faiss_index_path) # save_faiss_index(diary_after_cover_vec_file, faiss_index_path)
imgs = [ # imgs = [
"https://pic.igengmei.com/2020/07/03/1437/1b9975bb0b81-w", "https://pic.igengmei.com/2020/07/01/1812/ca64827a83da-w", # "https://pic.igengmei.com/2020/07/03/1437/1b9975bb0b81-w", "https://pic.igengmei.com/2020/07/01/1812/ca64827a83da-w",
"https://pic.igengmei.com/2020/07/04/1711/24f4131a9b1e-w", "https://pic.igengmei.com/2020/07/04/1507/e17a995be219-w" # "https://pic.igengmei.com/2020/07/04/1711/24f4131a9b1e-w", "https://pic.igengmei.com/2020/07/04/1507/e17a995be219-w"
] # ]
for img_url in imgs: # for img_url in imgs:
res = get_similar_diary_ids_by_url(img_url, limit=0.18232107) # res = get_similar_diary_ids_by_url(img_url, limit=0.18232107)
print(res) # print(res)
print("@@@@@@@@") # print("@@@@@@@@")
a = [ # a = [
-0.08361373096704483, 0.06760436296463013, 0.10752949863672256, -0.020746365189552307, -0.07035162299871445, # -0.08361373096704483, 0.06760436296463013, 0.10752949863672256, -0.020746365189552307, -0.07035162299871445,
-0.014547230675816536, -0.043201886117458344, -0.12196271121501923, 0.13929598033428192, -0.1360183209180832, # -0.014547230675816536, -0.043201886117458344, -0.12196271121501923, 0.13929598033428192, -0.1360183209180832,
0.23247791826725006, -0.08867999166250229, -0.24177594482898712, -0.05600903555750847, -0.05371646583080292, # 0.23247791826725006, -0.08867999166250229, -0.24177594482898712, -0.05600903555750847, -0.05371646583080292,
0.22015368938446045, -0.12883149087429047, -0.0822330191731453, -0.0413128100335598, 0.08704500645399094, # 0.22015368938446045, -0.12883149087429047, -0.0822330191731453, -0.0413128100335598, 0.08704500645399094,
0.10081718862056732, -0.03764188289642334, 0.036720920354127884, 0.04766431450843811, -0.0685625970363617, # 0.10081718862056732, -0.03764188289642334, 0.036720920354127884, 0.04766431450843811, -0.0685625970363617,
-0.38336044549942017, -0.10978807508945465, -0.07328074425458908, -0.023904308676719666, -0.007438751868903637, # -0.38336044549942017, -0.10978807508945465, -0.07328074425458908, -0.023904308676719666, -0.007438751868903637,
-0.09545779973268509, 0.027364756911993027, -0.1537190079689026, -0.04008519649505615, -0.03581209108233452, # -0.09545779973268509, 0.027364756911993027, -0.1537190079689026, -0.04008519649505615, -0.03581209108233452,
0.04322449117898941, -0.05686069279909134, -0.11610691249370575, 0.1640746295452118, -0.004643512889742851, # 0.04322449117898941, -0.05686069279909134, -0.11610691249370575, 0.1640746295452118, -0.004643512889742851,
-0.34821364283561707, 0.03711444139480591, -0.0026186704635620117, 0.1917344480752945, 0.14298999309539795, # -0.34821364283561707, 0.03711444139480591, -0.0026186704635620117, 0.1917344480752945, 0.14298999309539795,
0.04084448516368866, 0.06119539216160774, -0.12611950933933258, 0.10941470414400101, -0.20786598324775696, # 0.04084448516368866, 0.06119539216160774, -0.12611950933933258, 0.10941470414400101, -0.20786598324775696,
0.03435457497835159, 0.11412393301725388, 0.0602775476872921, 0.054409340023994446, -0.002967053558677435, # 0.03435457497835159, 0.11412393301725388, 0.0602775476872921, 0.054409340023994446, -0.002967053558677435,
-0.12524624168872833, 0.026284342631697655, 0.08236880600452423, -0.10654348134994507, 0.00403654295951128, # -0.12524624168872833, 0.026284342631697655, 0.08236880600452423, -0.10654348134994507, 0.00403654295951128,
0.10716681182384491, -0.08270247280597687, 0.018992319703102112, -0.11595900356769562, 0.18344789743423462, # 0.10716681182384491, -0.08270247280597687, 0.018992319703102112, -0.11595900356769562, 0.18344789743423462,
0.0895184576511383, -0.1307670772075653, -0.15750591456890106, 0.11103398352861404, -0.13521818816661835, # 0.0895184576511383, -0.1307670772075653, -0.15750591456890106, 0.11103398352861404, -0.13521818816661835,
-0.03199139982461929, 0.11129119992256165, -0.17407448589801788, -0.20658859610557556, -0.3114454746246338, # -0.03199139982461929, 0.11129119992256165, -0.17407448589801788, -0.20658859610557556, -0.3114454746246338,
0.01914297416806221, 0.39955294132232666, 0.12365783005952835, -0.14545315504074097, -0.03254598751664162, # 0.01914297416806221, 0.39955294132232666, 0.12365783005952835, -0.14545315504074097, -0.03254598751664162,
-0.10342024266719818, 0.03375910595059395, 0.11272192746400833, 0.21788232028484344, 0.08588762581348419, # -0.10342024266719818, 0.03375910595059395, 0.11272192746400833, 0.21788232028484344, 0.08588762581348419,
0.012640122324228287, -0.07646650820970535, -0.043292030692100525, 0.21306097507476807, -0.12407292425632477, # 0.012640122324228287, -0.07646650820970535, -0.043292030692100525, 0.21306097507476807, -0.12407292425632477,
-0.025112995877861977, 0.2634827196598053, 0.005047444254159927, 0.06562616676092148, -0.07397496700286865, # -0.025112995877861977, 0.2634827196598053, 0.005047444254159927, 0.06562616676092148, -0.07397496700286865,
0.06206338107585907, -0.0634055882692337, 0.05882266163825989, -0.05909111723303795, 0.027562778443098068, # 0.06206338107585907, -0.0634055882692337, 0.05882266163825989, -0.05909111723303795, 0.027562778443098068,
0.043835900723934174, 0.00407575536519289, -0.007656056433916092, 0.1048622876405716, -0.17822585999965668, # 0.043835900723934174, 0.00407575536519289, -0.007656056433916092, 0.1048622876405716, -0.17822585999965668,
0.1303984671831131, -0.021631652489304543, 0.0836174339056015, 0.11956407874822617, 0.007379574701189995, # 0.1303984671831131, -0.021631652489304543, 0.0836174339056015, 0.11956407874822617, 0.007379574701189995,
-0.07777556777000427, -0.08474794030189514, 0.09585978090763092, -0.21120299398899078, 0.1435444951057434, # -0.07777556777000427, -0.08474794030189514, 0.09585978090763092, -0.21120299398899078, 0.1435444951057434,
0.19884724915027618, 0.07154559344053268, 0.06259742379188538, 0.10118959099054337, 0.10188969224691391, # 0.19884724915027618, 0.07154559344053268, 0.06259742379188538, 0.10118959099054337, 0.10188969224691391,
-0.015351934358477592, -0.04335442930459976, -0.26258283853530884, -0.021509556099772453, 0.12185295671224594, # -0.015351934358477592, -0.04335442930459976, -0.26258283853530884, -0.021509556099772453, 0.12185295671224594,
-0.011788002215325832, 0.01337978895753622, -0.008025042712688446 # -0.011788002215325832, 0.01337978895753622, -0.008025042712688446
] # ]
res = get_similar_diary_ids_by_face_features(a) # res = get_similar_diary_ids_by_face_features(a)
print(res) # print(res)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -43,4 +43,4 @@ def es_scan(doc, body, es=None, rw="read"): ...@@ -43,4 +43,4 @@ def es_scan(doc, body, es=None, rw="read"):
if es is None: if es is None:
es = get_es() es = get_es()
index = es_index_adapt(index_prefix="gm-dbmw", doc_type=doc, rw=rw) index = es_index_adapt(index_prefix="gm-dbmw", doc_type=doc, rw=rw)
return helpers.scan(es, index=index, query=body) return helpers.scan(es, index=index, query=body, request_timeout=100, scroll="300m", raise_on_error=False)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment