Commit 7ff8f681 authored by 赵威's avatar 赵威

get device data

parent a720cfdb
...@@ -19,6 +19,10 @@ def get_ndays_before_no_minus(n): ...@@ -19,6 +19,10 @@ def get_ndays_before_no_minus(n):
return get_ndays_before_with_format(n, "%Y%m%d") return get_ndays_before_with_format(n, "%Y%m%d")
def get_ndays_before(n):
return get_ndays_before_with_format(n, "%Y-%m-%d")
def connect_doris(spark, table): def connect_doris(spark, table):
return spark.read.format("jdbc") \ return spark.read.format("jdbc") \
.option("driver", "com.mysql.jdbc.Driver") \ .option("driver", "com.mysql.jdbc.Driver") \
...@@ -62,6 +66,9 @@ def get_spark(app_name=""): ...@@ -62,6 +66,9 @@ def get_spark(app_name=""):
return spark return spark
### get data
def get_click_data(spark, card_type, start, end): def get_click_data(spark, card_type, start, end):
reg = r"""^\\d+$""" reg = r"""^\\d+$"""
sql = """ sql = """
...@@ -369,6 +376,15 @@ def get_card_feature_df(spark, card_type, yesterday): ...@@ -369,6 +376,15 @@ def get_card_feature_df(spark, card_type, yesterday):
return spark.sql(sql) return spark.sql(sql)
def get_device_tags(spark):
sql = """
SELECT cl_id, first_demands, first_solutions, first_positions, second_demands, second_solutions, second_positions, projects
FROM user_tag3_portrait
WHERE date = '{}'
""".format(get_ndays_before(1))
return spark.sql(sql)
if __name__ == "__main__": if __name__ == "__main__":
spark = get_spark("dssm_tractate_data") spark = get_spark("dssm_tractate_data")
card_type = "user_post" card_type = "user_post"
...@@ -383,4 +399,7 @@ if __name__ == "__main__": ...@@ -383,4 +399,7 @@ if __name__ == "__main__":
tractate_feature_df = get_card_feature_df(spark, card_type, end) tractate_feature_df = get_card_feature_df(spark, card_type, end)
tractate_feature_df.show(5, False) tractate_feature_df.show(5, False)
device_feature_df = get_device_tags(spark)
device_feature_df.show(5, False)
# spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/dssm/get_tractate_data.py # spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/dssm/get_tractate_data.py
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment