Commit 7c856a21 authored by 高雅喆's avatar 高雅喆

esmm train data first level1_id

parent a0d42a6e
...@@ -264,7 +264,7 @@ def get_predict_set(ucity_id,model,ccity_name,manufacturer,channel): ...@@ -264,7 +264,7 @@ def get_predict_set(ucity_id,model,ccity_name,manufacturer,channel):
if __name__ == "__main__": if __name__ == "__main__":
path = "/home/gaoyazhe/data/" path = "/home/gmuser/esmm_data/"
a = time.time() a = time.time()
df, validate_date, ucity_id,ccity_name,manufacturer,channel = get_data() df, validate_date, ucity_id,ccity_name,manufacturer,channel = get_data()
model = transform(df, validate_date) model = transform(df, validate_date)
......
...@@ -11,7 +11,7 @@ my_user='gaoyazhe@igengmei.com' ...@@ -11,7 +11,7 @@ my_user='gaoyazhe@igengmei.com'
def mail(): def mail():
ret=True ret=True
try: try:
with open('/home/gaoyazhe/data/submit.log') as f: with open('/home/gmuser/esmm_data/submit.log') as f:
stat_data = f.read() stat_data = f.read()
msg=MIMEText(stat_data,'plain','utf-8') msg=MIMEText(stat_data,'plain','utf-8')
msg['From']=formataddr(["高雅喆",my_sender]) msg['From']=formataddr(["高雅喆",my_sender])
......
...@@ -25,10 +25,10 @@ def set_join(lst): ...@@ -25,10 +25,10 @@ def set_join(lst):
def main(): def main():
# native queue # native queue
df2 = pd.read_csv('/home/gaoyazhe/data/native.csv',usecols=[0,1,2],header=0,names=['uid','city','cid_id'],sep='\t') df2 = pd.read_csv('/home/gmuser/esmm_data/native.csv',usecols=[0,1,2],header=0,names=['uid','city','cid_id'],sep='\t')
df2['cid_id'] = df2['cid_id'].astype(str) df2['cid_id'] = df2['cid_id'].astype(str)
df1 = pd.read_csv("/home/gaoyazhe/data/native/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"]) df1 = pd.read_csv("/home/gmuser/esmm_data/native/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
df2["ctr"],df2["cvr"],df2["ctcvr"] = df1["ctr"],df1["cvr"],df1["ctcvr"] df2["ctr"],df2["cvr"],df2["ctcvr"] = df1["ctr"],df1["cvr"],df1["ctcvr"]
df3 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False) df3 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
df3.columns = ["device_id","city_id","native_queue"] df3.columns = ["device_id","city_id","native_queue"]
...@@ -36,10 +36,10 @@ def main(): ...@@ -36,10 +36,10 @@ def main():
# nearby queue # nearby queue
df2 = pd.read_csv('/home/gaoyazhe/data/nearby.csv',usecols=[0,1,2],header=0,names=['uid','city','cid_id'],sep='\t') df2 = pd.read_csv('/home/gmuser/esmm_data/nearby.csv',usecols=[0,1,2],header=0,names=['uid','city','cid_id'],sep='\t')
df2['cid_id'] = df2['cid_id'].astype(str) df2['cid_id'] = df2['cid_id'].astype(str)
df1 = pd.read_csv("/home/gaoyazhe/data/nearby/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"]) df1 = pd.read_csv("/home/gmuser/esmm_data/nearby/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
df2["ctr"], df2["cvr"], df2["ctcvr"] = df1["ctr"], df1["cvr"], df1["ctcvr"] df2["ctr"], df2["cvr"], df2["ctcvr"] = df1["ctr"], df1["cvr"], df1["ctcvr"]
df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False) df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
df4.columns = ["device_id","city_id","nearby_queue"] df4.columns = ["device_id","city_id","nearby_queue"]
......
#! /bin/bash #! /bin/bash
PYTHON_PATH=/home/gaoyazhe/miniconda3/bin/python PYTHON_PATH=/home/gaoyazhe/miniconda3/bin/python
MODEL_PATH=/srv/apps/ffm-baseline/eda/esmm MODEL_PATH=/srv/apps/ffm-baseline/eda/esmm
DATA_PATH=/home/gaoyazhe/data DATA_PATH=/home/gmuser/esmm_data
echo "start time" echo "start time"
current=$(date "+%Y-%m-%d %H:%M:%S") current=$(date "+%Y-%m-%d %H:%M:%S")
......
...@@ -209,7 +209,7 @@ object EsmmData { ...@@ -209,7 +209,7 @@ object EsmmData {
|and d.partition_date='${stat_date_not}' |and d.partition_date='${stat_date_not}'
""".stripMargin """.stripMargin
) )
// union_data_scity_id.createOrReplaceTempView("union_data_scity_id") union_data_scity_id.createOrReplaceTempView("union_data_scity_id")
union_data_scity_id.show() union_data_scity_id.show()
val union_data_scity_id2 = sc.sql( val union_data_scity_id2 = sc.sql(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment