Commit 223dcbc2 authored by 赵威's avatar 赵威

time logger

parent 0f9626e6
...@@ -34,7 +34,8 @@ def get_spark(app_name=""): ...@@ -34,7 +34,8 @@ def get_spark(app_name=""):
"spark.sql.extensions", "spark.sql.extensions",
"org.apache.spark.sql.TiExtensions").config("spark.tispark.pd.addresses", "org.apache.spark.sql.TiExtensions").config("spark.tispark.pd.addresses",
"172.16.40.170:2379").appName(app_name).enableHiveSupport().getOrCreate() "172.16.40.170:2379").appName(app_name).enableHiveSupport().getOrCreate()
# sc = spark.sparkContext sc = spark.sparkContext
sc.setLogLevel("WARN")
# sc.addPyFile("/srv/apps/strategy_embedding/utils/date.py") # sc.addPyFile("/srv/apps/strategy_embedding/utils/date.py")
ti = pti.TiContext(spark) ti = pti.TiContext(spark)
ti.tidbMapDatabase("jerry_test") ti.tidbMapDatabase("jerry_test")
...@@ -237,12 +238,13 @@ def get_exposure_data(spark, card_type, start, end): ...@@ -237,12 +238,13 @@ def get_exposure_data(spark, card_type, start, end):
if __name__ == "__main__": if __name__ == "__main__":
spark = get_spark("dssm_tractate_data") spark = get_spark("dssm_tractate_data")
card_tye = "user_post" card_type = "user_post"
start, end = get_ndays_before_no_minus(180), get_ndays_before_no_minus(1) # TODO days 30
click_df = get_click_data(spark, card_tye, start, end) start, end = get_ndays_before_no_minus(5), get_ndays_before_no_minus(1)
click_df = get_click_data(spark, card_type, start, end)
click_df.show(5, False) click_df.show(5, False)
exposure_df = get_exposure_data(spark, card_tye, start, end) exposure_df = get_exposure_data(spark, card_type, start, end)
exposure_df.show(5, False) exposure_df.show(5, False)
# spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/dssm/get_tractate_data.py # spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/dssm/get_tractate_data.py
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment