to_database.py 3.25 KB
Newer Older
高雅喆's avatar
高雅喆 committed
1 2
#coding=utf-8

高雅喆's avatar
高雅喆 committed
3 4 5
from sqlalchemy import create_engine
import pandas as pd
import pymysql
6
import datetime
高雅喆's avatar
高雅喆 committed
7

张彦钊's avatar
张彦钊 committed
8

高雅喆's avatar
高雅喆 committed
9 10 11 12 13
def con_sql(sql):
    """
    :type sql : str
    :rtype : tuple
    """
14
    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
高雅喆's avatar
高雅喆 committed
15 16 17 18 19 20
    cursor = db.cursor()
    cursor.execute(sql)
    result = cursor.fetchall()
    db.close()
    return result

21

22
def set_join(lst):
23 24
    l = lst.unique().tolist()
    r = [str(i) for i in l]
25
    r =r[:500]
26 27
    return ','.join(r)

28

高雅喆's avatar
高雅喆 committed
29
def main():
高雅喆's avatar
高雅喆 committed
30 31

    # native queue
张彦钊's avatar
张彦钊 committed
32
    df2 = pd.read_csv(path+'/native.csv')
高雅喆's avatar
高雅喆 committed
33
    df2['cid_id'] = df2['cid_id'].astype(str)
高雅喆's avatar
高雅喆 committed
34

张彦钊's avatar
张彦钊 committed
35
    df1 = pd.read_csv(path+"/native/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
高雅喆's avatar
高雅喆 committed
36
    df2["ctr"],df2["cvr"],df2["ctcvr"] = df1["ctr"],df1["cvr"],df1["ctcvr"]
张彦钊's avatar
张彦钊 committed
37
    df3 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False))\
38
        .reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
高雅喆's avatar
高雅喆 committed
39
    df3.columns = ["device_id","city_id","native_queue"]
高雅喆's avatar
高雅喆 committed
40 41 42
    print("native_device_count",df3.shape)


高雅喆's avatar
高雅喆 committed
43
    # nearby queue
张彦钊's avatar
张彦钊 committed
44
    df2 = pd.read_csv(path+'/nearby.csv')
高雅喆's avatar
高雅喆 committed
45
    df2['cid_id'] = df2['cid_id'].astype(str)
高雅喆's avatar
高雅喆 committed
46

张彦钊's avatar
张彦钊 committed
47
    df1 = pd.read_csv(path+"/nearby/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
高雅喆's avatar
高雅喆 committed
48
    df2["ctr"], df2["cvr"], df2["ctcvr"] = df1["ctr"], df1["cvr"], df1["ctcvr"]
49 50
    df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False))\
        .reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
高雅喆's avatar
高雅喆 committed
51 52 53 54 55
    df4.columns = ["device_id","city_id","nearby_queue"]
    print("nearby_device_count",df4.shape)

    #union
    df_all = pd.merge(df3,df4,on=['device_id','city_id'],how='outer').fillna("")
高雅喆's avatar
高雅喆 committed
56 57
    df_all['device_id'] = df_all['device_id'].astype(str)
    df_all['city_id'] = df_all['city_id'].astype(str)
58
    df_all["time"] = str(datetime.datetime.now().strftime('%Y%m%d%H%M'))
高雅喆's avatar
高雅喆 committed
59 60
    print("union_device_count",df_all.shape)

61
    host='172.16.40.158'
高雅喆's avatar
高雅喆 committed
62 63 64 65 66 67
    port=4000
    user='root'
    password='3SYz54LS9#^9sBvC'
    db='jerry_test'
    charset='utf8'

张彦钊's avatar
张彦钊 committed
68
    df_merge = df_all['device_id'] + df_all['city_id']
张彦钊's avatar
张彦钊 committed
69 70 71 72 73 74 75 76
    to_delete = list(df_merge.values)
    total = len(to_delete)
    df_merge_str = [str(to_delete[:int(total/5)]).strip('[]')]
    for i in range(2,6):
        start = int(total*(i -1)/5)
        end = int(total*i/5)
        tmp = str(to_delete[start:end]).strip('[]')
        df_merge_str.append(tmp)
张彦钊's avatar
张彦钊 committed
77 78

    try:
张彦钊's avatar
张彦钊 committed
79 80 81 82 83 84 85 86
        for i in df_merge_str:
            delete_str = 'delete from esmm_device_diary_queue where concat(device_id,city_id) in ({0})'.format(i)
            con = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
            cur = con.cursor()
            cur.execute(delete_str)
            con.commit()
            print("delete done")
            con.close()
张彦钊's avatar
张彦钊 committed
87 88
        engine = create_engine(str(r"mysql+pymysql://%s:" + '%s' + "@%s:%s/%s") % (user, password, host, port, db))
        df_all.to_sql('esmm_device_diary_queue',con=engine,if_exists='append',index=False,chunksize=8000)
89 90
        print("insert done")

张彦钊's avatar
张彦钊 committed
91 92
    except Exception as e:
        print(e)
张彦钊's avatar
张彦钊 committed
93

张彦钊's avatar
张彦钊 committed
94

高雅喆's avatar
高雅喆 committed
95
if __name__ == '__main__':
张彦钊's avatar
张彦钊 committed
96
    path = "/home/gmuser/esmm"
高雅喆's avatar
高雅喆 committed
97
    main()