Commit 68af57e1 authored by 赵威's avatar 赵威

get online ids

parent e6a57b8d
......@@ -156,7 +156,7 @@ def get_diary_info_from_es(fields=["id"]):
return results
def get_offline_ids(content_type):
def get_online_ids(content_type):
q = {"query": {"bool": {"must": [{"term": {"is_online": False}}]}}}
results = es_scan(content_type, q)
return results
......@@ -8,7 +8,7 @@ sys.path.append(os.path.realpath("."))
from gensim.models import Word2Vec, word2vec
from utils.date import get_ndays_before_no_minus, get_ndays_before_with_format
from utils.es import get_offline_ids
from utils.es import get_online_ids
from utils.files import DATA_PATH, MODEL_PATH
from utils.spark import get_spark
......@@ -124,10 +124,10 @@ def get_answer_click_data(spark, start, end):
return df
def get_offline_answer_ids():
def get_online_answer_ids():
count = 0
res_set = set([])
for item in get_offline_ids("answer"):
for item in get_online_ids("answer"):
count += 1
try:
print(count)
......@@ -170,7 +170,7 @@ def save_clicked_answer_ids_item2vec():
if __name__ == "__main__":
begin_time = time.time()
res_set = get_offline_answer_ids()
res_set = get_online_answer_ids()
print(len(res_set))
# spark = get_spark("answer_click_ids")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment