to_database.py 3.18 KB
Newer Older
高雅喆's avatar
高雅喆 committed
1 2
#coding=utf-8

高雅喆's avatar
高雅喆 committed
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
from sqlalchemy import create_engine
import pandas as pd
import pymysql
import time

def con_sql(sql):
    """
    :type sql : str
    :rtype : tuple
    """
    db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
    cursor = db.cursor()
    cursor.execute(sql)
    result = cursor.fetchall()
    db.close()
    return result

20

21 22 23 24 25 26 27 28 29 30 31 32
def nearby_set_join(lst):
    # return ','.join([str(i) for i in list(lst)])
    return ','.join([str(i) for i in lst.unique().tolist()])


def native_set_join(lst):
    l = lst.unique().tolist()
    d = int(len(l)/2)
    if d == 0:
        d = 1
    r = [str(i) for i in l]
    r =r[:d]
33 34
    return ','.join(r)

35

高雅喆's avatar
高雅喆 committed
36
def main():
高雅喆's avatar
高雅喆 committed
37 38

    # native queue
39
    df2 = pd.read_csv('/home/gmuser/esmm_data/native.csv')
高雅喆's avatar
高雅喆 committed
40
    df2['cid_id'] = df2['cid_id'].astype(str)
高雅喆's avatar
高雅喆 committed
41

42
    df1 = pd.read_csv("/home/gmuser/esmm_data/native/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
高雅喆's avatar
高雅喆 committed
43
    df2["ctr"],df2["cvr"],df2["ctcvr"] = df1["ctr"],df1["cvr"],df1["ctcvr"]
44
    df3 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':native_set_join}).reset_index(drop=False)
高雅喆's avatar
高雅喆 committed
45
    df3.columns = ["device_id","city_id","native_queue"]
高雅喆's avatar
高雅喆 committed
46 47 48
    print("native_device_count",df3.shape)


高雅喆's avatar
高雅喆 committed
49
    # nearby queue
50
    df2 = pd.read_csv('/home/gmuser/esmm_data/nearby.csv')
高雅喆's avatar
高雅喆 committed
51
    df2['cid_id'] = df2['cid_id'].astype(str)
高雅喆's avatar
高雅喆 committed
52

53
    df1 = pd.read_csv("/home/gmuser/esmm_data/nearby/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
高雅喆's avatar
高雅喆 committed
54
    df2["ctr"], df2["cvr"], df2["ctcvr"] = df1["ctr"], df1["cvr"], df1["ctcvr"]
55
    df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':nearby_set_join}).reset_index(drop=False)
高雅喆's avatar
高雅喆 committed
56 57 58 59 60
    df4.columns = ["device_id","city_id","nearby_queue"]
    print("nearby_device_count",df4.shape)

    #union
    df_all = pd.merge(df3,df4,on=['device_id','city_id'],how='outer').fillna("")
高雅喆's avatar
高雅喆 committed
61 62
    df_all['device_id'] = df_all['device_id'].astype(str)
    df_all['city_id'] = df_all['city_id'].astype(str)
高雅喆's avatar
高雅喆 committed
63 64
    ctime = int(time.time())
    df_all["time"] = ctime
高雅喆's avatar
高雅喆 committed
65 66 67 68 69 70 71 72 73 74 75 76 77 78
    print("union_device_count",df_all.shape)



    host='10.66.157.22'
    port=4000
    user='root'
    password='3SYz54LS9#^9sBvC'
    db='jerry_test'
    charset='utf8'

    engine = create_engine(str(r"mysql+mysqldb://%s:" + '%s' + "@%s:%s/%s") % (user, password, host, port, db))

    try:
高雅喆's avatar
高雅喆 committed
79 80
        # df_merge = df_all[['device_id','city_id']].apply(lambda x: ''.join(x),axis=1)
        df_merge = df_all['device_id'] + df_all['city_id']
81 82 83 84 85 86
        df_merge_str = (str(list(df_merge.values))).strip('[]')
        delete_str = 'delete from esmm_device_diary_queue where concat(device_id,city_id) in ({0})'.format(df_merge_str)
        con = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
        cur = con.cursor()
        cur.execute(delete_str)
        con.commit()
87
        df_all.to_sql('esmm_device_diary_queue',con=engine,if_exists='append',index=False,chunksize=8000)
高雅喆's avatar
高雅喆 committed
88 89 90 91 92 93 94
    except Exception as e:
        print(e)



if __name__ == '__main__':
    main()