Commit 4dcaa94b authored by 高雅喆's avatar 高雅喆

change sort_and_2sql

parent 94c4bc5c
...@@ -345,7 +345,7 @@ def main(_): ...@@ -345,7 +345,7 @@ def main(_):
print("-"*100) print("-"*100)
with open(FLAGS.data_dir + "/pred.txt", "w") as fo: with open(FLAGS.data_dir + "/pred.txt", "w") as fo:
for prob in preds: for prob in preds:
fo.write("%f\t%f\n" % (prob['pctr'], prob['pcvr'])) fo.write("%f\t%f\t%f\n" % (prob['pctr'], prob['pcvr'], prob['pctcvr']))
elif FLAGS.task_type == 'export': elif FLAGS.task_type == 'export':
print("Not Implemented, Do It Yourself!") print("Not Implemented, Do It Yourself!")
#feature_spec = tf.feature_column.make_parse_example_spec(feature_columns) #feature_spec = tf.feature_column.make_parse_example_spec(feature_columns)
......
...@@ -22,36 +22,25 @@ def set_join(lst): ...@@ -22,36 +22,25 @@ def set_join(lst):
return ','.join(set(lst)) return ','.join(set(lst))
def main(): def main():
sql = "select device_id,city_id,cid from esmm_data2ffm_infer_native"
result = con_sql(sql) # native queue
dct = {"uid":[],"city":[],"cid_id":[]} df2 = pd.read_csv('/home/gaoyazhe/data/native.csv',usecols=[0,1,2],header=0,names=['uid','city','cid_id'],sep='\t')
for i in result:
dct["uid"].append(i[0]) df1 = pd.read_csv("/home/gaoyazhe/data/native/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
dct["city"].append(i[1]) df2["ctr"],df2["cvr"],df2["ctcvr"] = df1["ctr"],df1["cvr"],df1["ctcvr"]
dct["cid_id"].append(i[2]) df3 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
df1 = pd.read_csv("/home/gaoyazhe/data/native/pred.txt",sep='\t',header=None,names=["ctr","cvr"])
df2 = pd.DataFrame(dct)
df2["ctr"],df2["cvr"] = df1["ctr"],df1["cvr"]
df3 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="cvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
ctime = int(time.time()) ctime = int(time.time())
df3["time"] = ctime df3["time"] = ctime
df3.columns = ["device_id","city_id","native_queue","time"] df3.columns = ["device_id","city_id","native_queue","time"]
print("native_device_count",df3.shape) print("native_device_count",df3.shape)
sql_nearby = "select device_id,city_id,cid from esmm_data2ffm_infer_nearby" # nearby queue
result = con_sql(sql_nearby) df2 = pd.read_csv('/home/gaoyazhe/data/nearby.csv',usecols=[0,1,2],header=0,names=['uid','city','cid_id'],sep='\t')
dct = {"uid":[],"city":[],"cid_id":[]}
for i in result:
dct["uid"].append(i[0])
dct["city"].append(i[1])
dct["cid_id"].append(i[2])
df1 = pd.read_csv("/home/gaoyazhe/data/nearby/pred.txt",sep='\t',header=None,names=["ctr","cvr"]) df1 = pd.read_csv("/home/gaoyazhe/data/nearby/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
df2 = pd.DataFrame(dct) df2["ctr"], df2["cvr"], df2["ctcvr"] = df1["ctr"], df1["cvr"], df1["ctcvr"]
df2["ctr"],df2["cvr"] = df1["ctr"],df1["cvr"] df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="cvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
df4.columns = ["device_id","city_id","nearby_queue"] df4.columns = ["device_id","city_id","nearby_queue"]
print("nearby_device_count",df4.shape) print("nearby_device_count",df4.shape)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment