eda.py 1.32 KB
Newer Older
张彦钊's avatar
张彦钊 committed
1 2 3 4 5 6 7
import pymysql
import pandas as pd
from multiprocessing import Pool
import numpy as np
import datetime
import time

张彦钊's avatar
张彦钊 committed
8 9

def con_sql(db, sql):
张彦钊's avatar
张彦钊 committed
10
    cursor = db.cursor()
张彦钊's avatar
张彦钊 committed
11 12 13 14
    cursor.execute(sql)
    result = cursor.fetchone()[0]

    return result
张彦钊's avatar
张彦钊 committed
15 16


张彦钊's avatar
张彦钊 committed
17 18 19 20 21 22 23 24 25 26 27 28
# def test(days):
#     start = (temp - datetime.timedelta(days)).strftime("%Y-%m-%d")
#     print(start)
#     sql = "select (select count(*) from esmm_train_data where stat_date = '{}' and y = 0)/(select count(*) " \
#           "from train_data where stat_date = '{}' and z = 1)".format(start,start)
#     db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
#     exp = con_sql(db, sql)
#     print(exp)
#     sql = "select (select count(*) from train_data where stat_date = '{}' and y = 1 and z = 0)/(select count(*) " \
#           "from train_data where stat_date = '{}' and z = 1)".format(start,start)
#     click = con_sql(db, sql)
#     return start,exp,click
张彦钊's avatar
张彦钊 committed
29 30 31


if __name__ == "__main__":
张彦钊's avatar
张彦钊 committed
32 33 34 35 36 37 38 39 40 41
    # temp = datetime.datetime.strptime("2019-03-14", "%Y-%m-%d")
    # DIRECTORY_PATH = "/home/gmuser/"
    # output_path = DIRECTORY_PATH + "esmm_train_eda.csv"
    # for i in range(1,41):
    #     a,b,c = test(i)
    #     with open(output_path, 'a+') as f:
    #         line = str(a) + ',' + str(b)+ ',' + str(c) + '\n'
    #         f.write(line)


张彦钊's avatar
张彦钊 committed
42