Commit 223dcbc2 authored by 赵威's avatar 赵威

time logger

parent 0f9626e6
......@@ -34,7 +34,8 @@ def get_spark(app_name=""):
"spark.sql.extensions",
"org.apache.spark.sql.TiExtensions").config("spark.tispark.pd.addresses",
"172.16.40.170:2379").appName(app_name).enableHiveSupport().getOrCreate()
# sc = spark.sparkContext
sc = spark.sparkContext
sc.setLogLevel("WARN")
# sc.addPyFile("/srv/apps/strategy_embedding/utils/date.py")
ti = pti.TiContext(spark)
ti.tidbMapDatabase("jerry_test")
......@@ -237,12 +238,13 @@ def get_exposure_data(spark, card_type, start, end):
if __name__ == "__main__":
spark = get_spark("dssm_tractate_data")
card_tye = "user_post"
start, end = get_ndays_before_no_minus(180), get_ndays_before_no_minus(1)
click_df = get_click_data(spark, card_tye, start, end)
card_type = "user_post"
# TODO days 30
start, end = get_ndays_before_no_minus(5), get_ndays_before_no_minus(1)
click_df = get_click_data(spark, card_type, start, end)
click_df.show(5, False)
exposure_df = get_exposure_data(spark, card_tye, start, end)
exposure_df = get_exposure_data(spark, card_type, start, end)
exposure_df.show(5, False)
# spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/dssm/get_tractate_data.py
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment