Commit 4c592022 authored by Your Name's avatar Your Name

test

parent 23af1a9f
......@@ -221,14 +221,22 @@ if __name__ == "__main__":
te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"]
#predict and sort
result = predict(te_files)
df = pd.DataFrame(result, columns=["uid", "city", "cid_id", "pctcvr"])
print(df.head(10))
df.to_csv("/home/gmuser/test.csv")
df['uid1'] = df['uid'].apply(trans)
df['city1'] = df['city'].apply(trans)
df['cid_id1'] = df['cid_id'].apply(trans)
df2 = df.groupby(by=["uid1", "city1"]).apply(lambda x: x.sort_values(by="pctcvr", ascending=False)) \
.reset_index(drop=True).groupby(by=["uid1", "city1"]).agg({'cid_id1': set_join}).reset_index(drop=False)
df2.columns = ["device_id", "city_id", "nearby_queue"]
df2[time] = "2019-06-25"
#update or insert
host = '172.16.40.158'
port = 4000
user = 'root'
......@@ -236,7 +244,14 @@ if __name__ == "__main__":
db = 'jerry_test'
charset = 'utf8'
device_count = df2.shape[0]
con = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC',db='jerry_test')
cur = con.cursor()
for i in range(0, device_count):
query = "INSERT INTO esmm_device_diary_queue_test (device_id, city_id, time,nearby_queue) VALUES(%s, %s, %s, %s) ON DUPLICATE KEY UPDATE device_id=%s, city_id=%s, time=%s, nearby_queue=%s"
cur.execute(query, (df2.device_id[i], df2.city_id[i], df2.time[i], df.nearby_queue[i], df2.device_id[i], df2.city_id[i], df2.time[i], df.nearby_queue[i]))
con.commit()
con.close()
print("耗时(min):")
print((time.time()-b)/60)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment