import pandas as pd import pymysql def con_sql(db,sql): cursor = db.cursor() try: cursor.execute(sql) result = cursor.fetchall() df = pd.DataFrame(list(result)) except Exception: print("发生异常", Exception) df = pd.DataFrame() finally: db.close() return df def exp(): db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') sql = "select native_queue from esmm_device_diary_queue where device_id = '358035085192742'" cursor = db.cursor() cursor.execute(sql) result = cursor.fetchone()[0] native = tuple(result.split(",")) print("total") print(len(native)) db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod') sql = "select diary_id,level1_ids,level2_ids,level3_ids from diary_feat where diary_id in {}".format(native) df = con_sql(db,sql) n = df.shape[0] one = df[1].unique() one_map = {} for i in one: one_map[i] = df.loc[df[1]==i].shape[0]/n print(sorted(one_map.items(),key = lambda x:x[1])) two = df[2].unique() two_map = {} print("分界线") for i in two: two_map[i] = df.loc[df[2] == i].shape[0] / n print(sorted(two_map.items(), key=lambda x: x[1])) def click(): db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod') sql = "select d.cid_id,f.level1_ids,f.level2_ids from data_feed_click d left join diary_feat f " \ "on d.cid_id = f.diary_id where d.device_id = '358035085192742' " \ "and (d.cid_type = 'diary' or d.cid_type = 'diary_video') and d.stat_date > '2018-12-20'" df = con_sql(db, sql) n = df.shape[0] print(n) one = df[1].unique() one_map = {} for i in one: one_map[i] = df.loc[df[1] == i].shape[0] / n print(sorted(one_map.items(), key=lambda x: x[1],reverse=True)) two = df[2].unique() two_map = {} print("分界线") for i in two: two_map[i] = df.loc[df[2] == i].shape[0] / n print(sorted(two_map.items(), key=lambda x: x[1],reverse=True)) def get_cid(): db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') sql = "select cid_id from esmm_train_data where device_id = '358035085192742' " \ "and stat_date >= '2018-12-03'" df = con_sql(db, sql)[0].values.tolist() print(",".join(df)) if __name__ == "__main__": import pandas as pd from sklearn.preprocessing import MinMaxScaler # 读取葡萄酒数据集 data = pd.read_csv("G:/dataset/wine.csv") # 获取第二列Alcohol x = data["Alcohol"] # 获取数据的基本情况 print(x.describe()) minMax = MinMaxScaler() # 将数据进行归一化 x_std = minMax.fit_transform(x) pd.DataFrame()