Commit ae16b6d6 authored by 张彦钊's avatar 张彦钊

add eda file

parent bbcd3bfa
import pymysql
import pandas as pd
from multiprocessing import Pool
import numpy as np
import datetime
import time
def con_sql(db,sql):
cursor = db.cursor()
try:
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
except Exception:
print("发生异常", Exception)
df = pd.DataFrame()
finally:
db.close()
return df
def test(days):
start = (temp - datetime.timedelta(days)).strftime("%Y-%m-%d")
print(start)
sql = "select (select count(*) from train_data where stat_date = '{}' and y = 0)/(select count(*) " \
"from train_data where stat_date = '{}' and z = 1)".format(start)
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
exp = con_sql(db, sql)[0].values.tolist()[0]
sql = "select (select count(*) from train_data where stat_date = '{}' and y = 1 and z = 0)/(select count(*) " \
"from train_data where stat_date = '{}' and z = 1)".format(start)
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
click = con_sql(db, sql)[0].values.tolist()[0]
return start,exp,click
if __name__ == "__main__":
temp = datetime.datetime.strptime("2019-03-14", "%Y-%m-%d")
DIRECTORY_PATH = "/home/gmuser/"
output_path = DIRECTORY_PATH + "eda.csv"
for i in range(1,41):
a,b,c = test(i)
with open(output_path, 'a+') as f:
line = str(a) + ',' + str(b)+ ',' + str(c) + '\n'
f.write(line)
......@@ -79,7 +79,7 @@ def get_data():
value_map = dict(zip(unique_values,temp))
df = df.drop("device_id", axis=1)
train = df
train = df[df["stat_date"] != validate_date+"stat_date"]
test = df[df["stat_date"] == validate_date+"stat_date"]
for i in ["ucity_id", "clevel1_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "l1", "time", "stat_date","l2"]:
......
......@@ -9,18 +9,7 @@ import time
from sqlalchemy import create_engine
def con_sql(db,sql):
cursor = db.cursor()
try:
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
except Exception:
print("发生异常", Exception)
df = pd.DataFrame()
finally:
db.close()
return df
# def test():
# sql = "select max(update_time) from ffm_diary_queue"
......@@ -285,6 +274,35 @@ def get_predict_set(ucity_id,model,ccity_name,manufacturer,channel):
# print("nearby_pre shape")
# print(nearby_pre.shape)
def con_sql(db,sql):
cursor = db.cursor()
try:
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
except Exception:
print("发生异常", Exception)
df = pd.DataFrame()
finally:
db.close()
return df
def test(days):
start = (temp - datetime.timedelta(days)).strftime("%Y-%m-%d")
print(start)
sql = "select (select count(*) from train_data where stat_date = '{}' and y = 0)/(select count(*) " \
"from train_data where stat_date = '{}' and z = 1)".format(start)
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
exp = con_sql(db, sql)[0].values.tolist()[0]
sql = "select (select count(*) from train_data where stat_date = '{}' and y = 1 and z = 0)/(select count(*) " \
"from train_data where stat_date = '{}' and z = 1)".format(start)
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
click = con_sql(db, sql)[0].values.tolist()[0]
return start,exp,click
if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment