Commit e916ae5c authored by 王志伟's avatar 王志伟
parents 9cde0cf4 eecd0fdd
......@@ -75,6 +75,7 @@ def con_sql(db,sql):
db.close()
return df
def feature_engineer():
apps_number, app_list_map, level2_number, leve2_map, level3_number, leve3_map = get_map()
unique_values = []
......
......@@ -8,7 +8,7 @@ export CLASSPATH="/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/api
echo $CLASSPATH
export LD_LIBRARY_PATH="/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server:/opt/hadoop/lib/native"
echo $LD_LIBRARY_PATH
export PATH=$PATH:/usr/local/hadoop/bin/
echo "rm model file"
rm -r ${LOCAL_PATH}/model_ckpt/DeepCvrMTL/20*
......@@ -29,4 +29,6 @@ echo "sort and 2sql"
${PYTHON_PATH} ${MODEL_PATH}/to_database.py > "/home/gmuser/esmm/log/insert_$b.log"
echo "delete files"
rm /home/gmuser/esmm/*.csv
rm /home/gmuser/esmm/native/*
rm /home/gmuser/esmm/nearby/*
\ No newline at end of file
......@@ -19,12 +19,7 @@ def con_sql(sql):
return result
def nearby_set_join(lst):
# return ','.join([str(i) for i in list(lst)])
return ','.join([str(i) for i in lst.unique().tolist()])
def native_set_join(lst):
def set_join(lst):
l = lst.unique().tolist()
r = [str(i) for i in l]
r =r[:500]
......@@ -39,7 +34,8 @@ def main():
df1 = pd.read_csv(path+"/native/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
df2["ctr"],df2["cvr"],df2["ctcvr"] = df1["ctr"],df1["cvr"],df1["ctcvr"]
df3 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':native_set_join}).reset_index(drop=False)
df3 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False))\
.reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
df3.columns = ["device_id","city_id","native_queue"]
print("native_device_count",df3.shape)
......@@ -50,7 +46,8 @@ def main():
df1 = pd.read_csv(path+"/nearby/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
df2["ctr"], df2["cvr"], df2["ctcvr"] = df1["ctr"], df1["cvr"], df1["ctcvr"]
df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':nearby_set_join}).reset_index(drop=False)
df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False))\
.reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
df4.columns = ["device_id","city_id","nearby_queue"]
print("nearby_device_count",df4.shape)
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment