Commit 89424ef4 authored by 赵威's avatar 赵威

save data

parent 873ab3f3
......@@ -10,7 +10,7 @@ from utils.es import es_query
from utils.images import face_to_vec, url_to_ndarray
def save_diary_image_info(file, face_to_vec_f):
def save_diary_image_info(save_file, face_to_vec_f):
q = {
"query": {
"bool": {
......@@ -38,7 +38,7 @@ def save_diary_image_info(file, face_to_vec_f):
}
}
with open(file, "w"):
with open(save_file, "w") as f:
step = 100
# for i in range(0, 500000, step):
res_dict = es_query("diary", q, 0, step)
......@@ -47,18 +47,33 @@ def save_diary_image_info(file, face_to_vec_f):
diary_id = item["_source"]["id"]
before_cover_url = item["_source"]["before_cover_url"] + "-w"
after_cover_url = item["_source"]["after_cover_url"] + "-w"
# print(str(diary_id) + " " + after_cover_url)
img = url_to_ndarray(after_cover_url)
if img.any():
faces = face_to_vec_f(img)
for face in faces:
line = str(diary_id) + "\t" + face["feature"] + "\t" + after_cover_url + "\n"
print(line)
line = str(diary_id) + "\t" + face["feature"] + "\n"
# print(line)
f.write(line)
def main():
img_url = "https://pic.igengmei.com/2020/07/03/1437/1b9975bb0b81-w"
img = url_to_ndarray(img_url)
def save_faiss_index(load_file, save_path):
with open(load_file, "r") as f:
ids = []
features = []
for line in f.readlines():
tmp = line.split("\t")
ids.append(tmp[0])
features.append(np.array(json.loads(tmp[1])))
ids_np = np.array(ids).astype("float32")
features_np = np.array(features).astype("int")
index = faiss.IndexHNSWFlat(128, 32)
index = faiss.IndexIDMap(index)
index.add_with_ids(features_np, ids_np)
faiss.write_index(index, save_path)
def main():
base_dir = os.getcwd()
print("base_dir: " + base_dir)
model_diry = os.path.join(base_dir, "_models")
......@@ -74,7 +89,10 @@ def main():
face_to_vec_f = lambda img: face_to_vec(img, face_rec, face_detector, shape_predictor)
save_diary_image_info(diary_after_cover_vec_file, face_to_vec_f)
save_faiss_index(diary_after_cover_vec_file, faiss_index_path)
# img_url = "https://pic.igengmei.com/2020/07/03/1437/1b9975bb0b81-w"
# img = url_to_ndarray(img_url)
# if img.any():
# faces = face_to_vec_f(img)
......@@ -109,4 +127,4 @@ if __name__ == "__main__":
main()
print("total cost: {:.2f}s".format(time.time() - begin_time))
print("total cost: {:.2f}mins".format((time.time() - begin_time)) / 60)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment