Commit 8b0682b3 authored by 张彦钊's avatar 张彦钊

fix bugs

parent 6b6f8d78
...@@ -32,8 +32,8 @@ exposure = exposure.rename(columns={0:"cid",1:"device_id",2:"time"}) ...@@ -32,8 +32,8 @@ exposure = exposure.rename(columns={0:"cid",1:"device_id",2:"time"})
print("成功获取曝光表里的数据") print("成功获取曝光表里的数据")
# 求曝光表和点击表的差集合 # 求曝光表和点击表的差集合
exposure.append(click) exposure = exposure.append(click)
exposure.append(click) exposure = exposure.append(click)
subset = click.columns.tolist() subset = click.columns.tolist()
exposure = exposure.drop_duplicates(subset=subset,keep=False) exposure = exposure.drop_duplicates(subset=subset,keep=False)
print("成功完成曝光表和点击表的差集合") print("成功完成曝光表和点击表的差集合")
...@@ -66,6 +66,8 @@ print(data.head(2)) ...@@ -66,6 +66,8 @@ print(data.head(2))
data = shuffle(data) data = shuffle(data)
print("start ffm transform") print("start ffm transform")
# ffm 格式转换函数、类 # ffm 格式转换函数、类
class FFMFormatPandas: class FFMFormatPandas:
def __init__(self): def __init__(self):
...@@ -123,6 +125,7 @@ class FFMFormatPandas: ...@@ -123,6 +125,7 @@ class FFMFormatPandas:
t = df.dtypes.to_dict() t = df.dtypes.to_dict()
return pd.Series({idx: self.transform_row_(row, t) for idx, row in df.iterrows()}) return pd.Series({idx: self.transform_row_(row, t) for idx, row in df.iterrows()})
ffm_train = FFMFormatPandas() ffm_train = FFMFormatPandas()
data = ffm_train.fit_transform(data, y='y') data = ffm_train.fit_transform(data, y='y')
print("done transform ffm") print("done transform ffm")
...@@ -131,6 +134,8 @@ data = pd.read_csv("/home/zhangyanzhao/data.csv",header=None) ...@@ -131,6 +134,8 @@ data = pd.read_csv("/home/zhangyanzhao/data.csv",header=None)
print("数据集大小") print("数据集大小")
print(data.shape) print(data.shape)
print(data.head(2)) print(data.head(2))
'''
n = np.rint(data.shape[0]/8) n = np.rint(data.shape[0]/8)
m = np.rint(data.shape[0]*(3/8)) m = np.rint(data.shape[0]*(3/8))
# 1/8的数据集用来做测试集 # 1/8的数据集用来做测试集
...@@ -148,19 +153,22 @@ train = data.loc[m+1:] ...@@ -148,19 +153,22 @@ train = data.loc[m+1:]
print("训练集大小") print("训练集大小")
print(train.shape) print(train.shape)
train.to_csv("/home/zhangyanzhao/train.csv",index = False,header = None) train.to_csv("/home/zhangyanzhao/train.csv",index = False,header = None)
'''
print("start training") print("start training")
ffm_model = xl.create_ffm() ffm_model = xl.create_ffm()
ffm_model.setTrain("/home/zhangyanzhao/train.csv") ffm_model.setTrain("/home/zhangyanzhao/data.csv")
ffm_model.setValidate("/home/zhangyanzhao/validation.csv") # ffm_model.setValidate("/home/zhangyanzhao/validation.csv")
param = {'task':'binary', 'lr':0.2, param = {'task':'binary', 'lr':0.05,
'lambda':0.002, 'metric':'auc'} 'lambda':0.002, 'metric':'auc'}
ffm_model.fit(param, '/home/zhangyanzhao/model.out') ffm_model.fit(param, '/home/zhangyanzhao/model.out')
'''
ffm_model.setTest("/home/zhangyanzhao/test.csv") ffm_model.setTest("/home/zhangyanzhao/test.csv")
ffm_model.setSigmoid() ffm_model.setSigmoid()
ffm_model.predict("/home/zhangyanzhao/model.out", "/home/zhangyanzhao/output.txt") ffm_model.predict("/home/zhangyanzhao/model.out", "/home/zhangyanzhao/output.txt")
'''
print("end") print("end")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment