Commit 6e43d520 authored by 赵威's avatar 赵威

get print data

parent 787f0e4f
......@@ -6,12 +6,16 @@ from collections import defaultdict
sys.path.append(os.path.realpath("."))
import random
from gensim.models import Word2Vec, word2vec
from utils.date import get_ndays_before_no_minus, get_ndays_before_with_format
from utils.date import (get_ndays_before_no_minus,
get_ndays_before_with_format)
from utils.es import get_online_ids
from utils.files import DATA_PATH, MODEL_PATH
from utils.spark import get_spark
answer_click_ids_model_path = os.path.join(MODEL_PATH, "answer_click_ids_item2vec_model")
try:
......@@ -31,7 +35,7 @@ def get_answer_click_data(spark, start, end):
where action = 'page_view'
AND partition_date BETWEEN '{}' AND '{}'
AND page_name='answer_detail'
AND page_stay>=1
AND page_stay>=0.5
AND cl_id is not null
AND cl_id != ''
AND business_id is not null
......@@ -179,6 +183,7 @@ if __name__ == "__main__":
print(click_df.count())
res_dict = get_device_click_answer_ids_dict(click_df)
print(random.sample(res_dict.items(), 3))
with open(os.path.join(DATA_PATH, "click_answer_ids.csv"), "w") as f:
for (k, v) in res_dict.items():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment