Commit 2f8b47e9 authored by 张彦钊's avatar 张彦钊

修改测试文件

parent 3010cb4d
......@@ -39,8 +39,6 @@ def feature_engineer():
"where e.stat_date >= '{}'".format(start)
df = spark.sql(sql)
print(df.count())
df.show(6)
url = "jdbc:mysql://172.16.30.143:3306/zhengxing"
jdbcDF = spark.read.format("jdbc").option("driver", "com.mysql.jdbc.Driver").option("url", url) \
......@@ -55,19 +53,11 @@ def feature_engineer():
hospital = spark.sql(sql)
df = df.join(hospital,"diary_service_id","left_outer").fillna("na")
df.show(6)
print(df.count())
df = df.drop(["level2","diary_service_id"])
# df = df.drop_duplicates(["ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer",
# "channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids"])
df = df.drop_duplicates(["ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer",
"channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids"])
print(df.count())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment