Commit fbb3fd1c authored by 张彦钊's avatar 张彦钊

add print in ctr

parent f0b5b9e9
...@@ -64,6 +64,7 @@ def get_tail6(): ...@@ -64,6 +64,7 @@ def get_tail6():
print("成功获取") print("成功获取")
result = cursor.fetchall() result = cursor.fetchall()
db.close() db.close()
print(pd.DataFrame(list(result)).empty)
user = pd.DataFrame(list(result))[0].values.tolist() user = pd.DataFrame(list(result))[0].values.tolist()
user = tuple(user) user = tuple(user)
print("用户个数") print("用户个数")
......
...@@ -165,65 +165,67 @@ class multiFFMFormatPandas: ...@@ -165,65 +165,67 @@ class multiFFMFormatPandas:
return False return False
# ffm 格式转换函数、类 # ffm 格式转换函数、类
# class FFMFormatPandas: class FFMFormatPandas:
# def __init__(self): def __init__(self):
# self.field_index_ = None self.field_index_ = None
# self.feature_index_ = None self.feature_index_ = None
# self.y = None self.y = None
#
# def fit(self, df, y=None): def fit(self, df, y=None):
# self.y = y self.y = y
# df_ffm = df[df.columns.difference([self.y])] df_ffm = df[df.columns.difference([self.y])]
# if self.field_index_ is None: if self.field_index_ is None:
# self.field_index_ = {col: i for i, col in enumerate(df_ffm)} # 除了y,每列列名加索引对应的字典,例如field_index = {name:0,age:1}
# self.field_index_ = {col: i for i, col in enumerate(df_ffm)}
# if self.feature_index_ is not None:
# last_idx = max(list(self.feature_index_.values())) if self.feature_index_ is not None:
# last_idx = max(list(self.feature_index_.values()))
# if self.feature_index_ is None:
# self.feature_index_ = dict() if self.feature_index_ is None:
# last_idx = 0 self.feature_index_ = dict()
# last_idx = 0
# for col in df.columns:
# vals = df[col].unique() for col in df.columns:
# for val in vals: vals = df[col].unique()
# if pd.isnull(val): for val in vals:
# continue if pd.isnull(val):
# name = '{}_{}'.format(col, val) continue
# if name not in self.feature_index_: name = '{}_{}'.format(col, val)
# self.feature_index_[name] = last_idx if name not in self.feature_index_:
# last_idx += 1 # feature_index = {name_tom :0,name_lily :1,name:2,age_18:3,age_19:4:age:5}
# self.feature_index_[col] = last_idx self.feature_index_[name] = last_idx
# last_idx += 1 last_idx += 1
# return self self.feature_index_[col] = last_idx
# last_idx += 1
# def fit_transform(self, df, y=None): return self
# self.fit(df, y)
# return self.transform(df) def fit_transform(self, df, y=None):
# self.fit(df, y)
# def transform_row_(self, row, t): return self.transform(df)
# ffm = []
# if self.y is not None: def transform_row_(self, row, t):
# ffm.append(str(row.loc[row.index == self.y][0])) ffm = []
# if self.y is None: if self.y is not None:
# ffm.append(str(0)) ffm.append(str(row.loc[row.index == self.y][0]))
# if self.y is None:
# for col, val in row.loc[row.index != self.y].to_dict().items(): ffm.append(str(0))
# col_type = t[col]
# name = '{}_{}'.format(col, val) for col, val in row.loc[row.index != self.y].to_dict().items():
# if col_type.kind == 'O': col_type = t[col]
# ffm.append('{}:{}:1'.format(self.field_index_[col], self.feature_index_[name])) name = '{}_{}'.format(col, val)
# elif col_type.kind == 'i': if col_type.kind == 'O':
# ffm.append('{}:{}:{}'.format(self.field_index_[col], self.feature_index_[col], val)) ffm.append('{}:{}:1'.format(self.field_index_[col], self.feature_index_[name]))
# return ' '.join(ffm) elif col_type.kind == 'i':
# ffm.append('{}:{}:{}'.format(self.field_index_[col], self.feature_index_[col], val))
# def transform(self, df): return ' '.join(ffm)
# t = df.dtypes.to_dict()
# return pd.Series({idx: self.transform_row_(row, t) for idx, row in df.iterrows()}) def transform(self, df):
# t = df.dtypes.to_dict()
# # 下面这个方法不是这个类原有的方法,是新增的。目的是用来判断这个用户是不是在训练数据集中存在 return pd.Series({idx: self.transform_row_(row, t) for idx, row in df.iterrows()})
# def is_feature_index_exist(self, name):
# if name in self.feature_index_: # 下面这个方法不是这个类原有的方法,是新增的。目的是用来判断这个用户是不是在训练数据集中存在
# return True def is_feature_index_exist(self, name):
# else: if name in self.feature_index_:
# return False return True
else:
return False
...@@ -6,7 +6,7 @@ import pymysql ...@@ -6,7 +6,7 @@ import pymysql
def fetch_data(start_date, end_date): def fetch_data(start_date, end_date):
# 获取点击表里的device_id # 获取点击表里的device_id
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
sql = "select distinct device_id from data_feed_click" sql = "select distinct device_id from data_feed_click"
click_device_id = con_sql(db,sql)[0].values.tolist() click_device_id = con_sql(db,sql)[0].values.tolist()
print("成功获取点击表里的device_id") print("成功获取点击表里的device_id")
...@@ -14,7 +14,7 @@ def fetch_data(start_date, end_date): ...@@ -14,7 +14,7 @@ def fetch_data(start_date, end_date):
sql = "select cid,device_id,time,stat_date from data_feed_click " \ sql = "select cid,device_id,time,stat_date from data_feed_click " \
"where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date) "where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date)
# 因为上面的db已经关了,需要再写一遍 # 因为上面的db已经关了,需要再写一遍
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
click = con_sql(db,sql) click = con_sql(db,sql)
click = click.rename(columns={0: "cid", 1: "device_id", 2: "time_date", 3: "stat_date"}) click = click.rename(columns={0: "cid", 1: "device_id", 2: "time_date", 3: "stat_date"})
print("成功获取点击表里的数据") print("成功获取点击表里的数据")
...@@ -28,7 +28,7 @@ def fetch_data(start_date, end_date): ...@@ -28,7 +28,7 @@ def fetch_data(start_date, end_date):
"where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date) "where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date)
start = time.time() start = time.time()
# 因为上面的db已经关了,需要再写一遍 # 因为上面的db已经关了,需要再写一遍
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
exposure = con_sql(db,sql) exposure = con_sql(db,sql)
end = time.time() end = time.time()
print("获取曝光表耗时{}分".format((end-start)/60)) print("获取曝光表耗时{}分".format((end-start)/60))
......
...@@ -283,7 +283,7 @@ class multiFFMFormatPandas: ...@@ -283,7 +283,7 @@ class multiFFMFormatPandas:
# t = df.dtypes.to_dict() # t = df.dtypes.to_dict()
# return pd.Series({idx: self.transform_row_(row, t) for idx, row in df.iterrows()}) # return pd.Series({idx: self.transform_row_(row, t) for idx, row in df.iterrows()})
# #
# # 下面这个方法不是这个类原有的方法,是新增的。目的是用来判断这个用户是不是在训练数据集中存在 # 下面这个方法不是这个类原有的方法,是新增的。目的是用来判断这个用户是不是在训练数据集中存在
# def is_feature_index_exist(self, name): # def is_feature_index_exist(self, name):
# if name in self.feature_index_: # if name in self.feature_index_:
# return True # return True
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment