sort_and_2sql.py 3.11 KB
Newer Older
高雅喆's avatar
高雅喆 committed
1 2
#coding=utf-8

高雅喆's avatar
高雅喆 committed
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
from sqlalchemy import create_engine
import pandas as pd
import pymysql
import MySQLdb
import time

def con_sql(sql):
    """
    :type sql : str
    :rtype : tuple
    """
    db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
    cursor = db.cursor()
    cursor.execute(sql)
    result = cursor.fetchall()
    db.close()
    return result

def set_join(lst):
高雅喆's avatar
高雅喆 committed
22 23
    # return ','.join([str(i) for i in list(lst)])
    return ','.join([str(i) for i in lst.unique().tolist()])
高雅喆's avatar
高雅喆 committed
24 25

def main():
高雅喆's avatar
高雅喆 committed
26 27

    # native queue
28
    df2 = pd.read_csv('/home/gmuser/esmm_data/native.csv',usecols=[0,1,2],header=0,names=['uid','city','cid_id'],sep='\t')
高雅喆's avatar
高雅喆 committed
29
    df2['cid_id'] = df2['cid_id'].astype(str)
高雅喆's avatar
高雅喆 committed
30

31
    df1 = pd.read_csv("/home/gmuser/esmm_data/native/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
高雅喆's avatar
高雅喆 committed
32 33
    df2["ctr"],df2["cvr"],df2["ctcvr"] = df1["ctr"],df1["cvr"],df1["ctcvr"]
    df3 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
高雅喆's avatar
高雅喆 committed
34
    df3.columns = ["device_id","city_id","native_queue"]
高雅喆's avatar
高雅喆 committed
35 36 37
    print("native_device_count",df3.shape)


高雅喆's avatar
高雅喆 committed
38
    # nearby queue
39
    df2 = pd.read_csv('/home/gmuser/esmm_data/nearby.csv',usecols=[0,1,2],header=0,names=['uid','city','cid_id'],sep='\t')
高雅喆's avatar
高雅喆 committed
40
    df2['cid_id'] = df2['cid_id'].astype(str)
高雅喆's avatar
高雅喆 committed
41

42
    df1 = pd.read_csv("/home/gmuser/esmm_data/nearby/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
高雅喆's avatar
高雅喆 committed
43 44
    df2["ctr"], df2["cvr"], df2["ctcvr"] = df1["ctr"], df1["cvr"], df1["ctcvr"]
    df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
高雅喆's avatar
高雅喆 committed
45 46 47 48 49
    df4.columns = ["device_id","city_id","nearby_queue"]
    print("nearby_device_count",df4.shape)

    #union
    df_all = pd.merge(df3,df4,on=['device_id','city_id'],how='outer').fillna("")
高雅喆's avatar
高雅喆 committed
50 51
    df_all['device_id'] = df_all['device_id'].astype(str)
    df_all['city_id'] = df_all['city_id'].astype(str)
高雅喆's avatar
高雅喆 committed
52 53
    ctime = int(time.time())
    df_all["time"] = ctime
高雅喆's avatar
高雅喆 committed
54 55 56 57 58 59 60 61 62 63 64 65 66 67
    print("union_device_count",df_all.shape)



    host='10.66.157.22'
    port=4000
    user='root'
    password='3SYz54LS9#^9sBvC'
    db='jerry_test'
    charset='utf8'

    engine = create_engine(str(r"mysql+mysqldb://%s:" + '%s' + "@%s:%s/%s") % (user, password, host, port, db))

    try:
高雅喆's avatar
高雅喆 committed
68 69
        # df_merge = df_all[['device_id','city_id']].apply(lambda x: ''.join(x),axis=1)
        df_merge = df_all['device_id'] + df_all['city_id']
70 71 72 73 74 75 76
        df_merge_str = (str(list(df_merge.values))).strip('[]')
        delete_str = 'delete from esmm_device_diary_queue where concat(device_id,city_id) in ({0})'.format(df_merge_str)
        con = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
        cur = con.cursor()
        cur.execute(delete_str)
        con.commit()
        df_all.to_sql('esmm_device_diary_queue',con=engine,if_exists='append',index=False)
高雅喆's avatar
高雅喆 committed
77 78 79 80 81 82 83
    except Exception as e:
        print(e)



if __name__ == '__main__':
    main()