Commit 3ca23564 authored by 王志伟's avatar 王志伟
parents 17b1f603 ae14fda6
......@@ -323,7 +323,7 @@ object EsmmPredData {
val union_data_scity_id = sc.sql(
s"""
|select a.stat_date,a.device_id,a.ucity_id,a.cid_id,a.diary_service_id,a.y,a.z,a.clevel1_id,a.slevel1_id,a.ccity_name,
|select distinct a.stat_date,a.device_id,a.ucity_id,a.cid_id,a.diary_service_id,a.y,a.z,a.clevel1_id,a.slevel1_id,a.ccity_name,
| d.city_id as scity_id
|from union_data_ccity_name a
|left join online.tl_meigou_service_view b on a.diary_service_id=b.id
......
......@@ -35,8 +35,8 @@ def get_data():
print("click data ok")
# print(temp.head())
df = pd.merge(esmm,temp,on = "device_id",how='left').fillna(0)
# print(df.shape)
print("合并后:")
print(df.shape)
df["diary_service_id"] = df["diary_service_id"].astype("str")
df["clevel1_id"] = df["clevel1_id"].astype("str")
......@@ -67,8 +67,11 @@ def transform(df):
df = df.drop(0, axis=1).drop("seq",axis=1)
print("size")
print(df.shape)
train = df[df["stat_date"] != "2018-11-25"].drop("stat_date",axis=1)
test = df[df["stat_date"] == "2018-11-25"].drop("stat_date",axis=1)
print(df.head())
train = df[df["stat_date"] != "2018-11-25"]
train = train.drop("stat_date",axis=1)
test = df[df["stat_date"] == "2018-11-25"]
test = test.drop("stat_date",axis=1)
train.to_csv(path+"train.csv",index=None)
test.to_csv(path + "test.csv", index=None)
# yconnect = create_engine('mysql+pymysql://root:3SYz54LS9#^9sBvC@10.66.157.22:4000/jerry_test?charset=utf8')
......@@ -194,5 +197,5 @@ class multiFFMFormatPandas:
if __name__ == "__main__":
path = "/data/ffm/"
path = "/home/gmuser/ffm/"
get_data()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment