Commit 568715d5 authored by 赵威's avatar 赵威

get tags

parent a5881d91
...@@ -19,6 +19,26 @@ def get_ndays_before_no_minus(n): ...@@ -19,6 +19,26 @@ def get_ndays_before_no_minus(n):
return get_ndays_before_with_format(n, "%Y%m%d") return get_ndays_before_with_format(n, "%Y%m%d")
def connect_doris(spark, table):
return spark.read.format("jdbc") \
.option("driver", "com.mysql.jdbc.Driver") \
.option("url", "jdbc:mysql://172.16.30.136:3306/doris_prod") \
.option("dbtable", table) \
.option("user", "doris") \
.option("password", "o5gbA27hXHHm") \
.load()
def get_content_tag3(spark, card_type):
if card_type == "diary":
content_tag3 = connect_doris(spark, "strategy_content_tagv3_info")
elif card_type == "user_post":
content_tag3 = connect_doris(spark, "strategy_tractate_tagv3_info")
else:
content_tag3 = connect_doris(spark, "strategy_answer_tagv3_info")
return content_tag3
def get_spark(app_name=""): def get_spark(app_name=""):
sparkConf = SparkConf() sparkConf = SparkConf()
sparkConf.set("spark.sql.crossJoin.enabled", True) sparkConf.set("spark.sql.crossJoin.enabled", True)
...@@ -237,6 +257,9 @@ def get_exposure_data(spark, card_type, start, end): ...@@ -237,6 +257,9 @@ def get_exposure_data(spark, card_type, start, end):
def get_card_feature_df(spark, card_type, yesterday): def get_card_feature_df(spark, card_type, yesterday):
content_tag3 = get_content_tag3(spark, card_type)
content_tag3.createOrReplaceTempView("content_tag3")
reg = r"""^\\d+$""" reg = r"""^\\d+$"""
sql = """ sql = """
SELECT CAST(card_id as INT) as card_id, SELECT CAST(card_id as INT) as card_id,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment