Commit 5b81b3e7 authored by 赵威's avatar 赵威

get tractate data

parent 42d5aeb3
......@@ -25,7 +25,7 @@ pymysql==0.10.1
gensim==3.8.3
# pyspark==2.3.3
pyspark==2.3.3
pytispark==2.0
ipython
from collections import defaultdict
from datetime import date, timedelta
from pyspark import SparkConf
......@@ -135,16 +136,24 @@ def get_tracate_click_data(spark, start, end):
""".format(start, end, reg, start, end, end, start, end, end, end, end, end)
# print("sql", flush=True)
# print(sql, flush=True)
return spark.sql(sql)
df = spark.sql(sql)
return df
def get_device_click_tractate_ids():
pass
def get_device_click_tractate_ids(click_df):
res = defaultdict(list)
cols = click_df.orderBy("partition_date", ascending=False).limit(100).collect()
for i in cols:
res[i["cl_id"]].append(i["card_id"])
return res
if __name__ == "__main__":
spark = get_spark("test")
click_data = get_tracate_click_data(spark, get_ndays_before_no_minus(2), get_ndays_before_no_minus(1))
click_data.show(5, False)
# TODO 30 days
click_df = get_tracate_click_data(spark, get_ndays_before_no_minus(5), get_ndays_before_no_minus(1))
click_df.show(5, False)
res_dict = get_device_click_tractate_ids(click_df)
print(res_dict)
# /opt/spark/bin/spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/word_vector/tractate.py
# spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/word_vector/tractate.py
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment