Commit 8b0682b3 authored by 张彦钊's avatar 张彦钊

fix bugs

parent 6b6f8d78
......@@ -32,8 +32,8 @@ exposure = exposure.rename(columns={0:"cid",1:"device_id",2:"time"})
print("成功获取曝光表里的数据")
# 求曝光表和点击表的差集合
exposure.append(click)
exposure.append(click)
exposure = exposure.append(click)
exposure = exposure.append(click)
subset = click.columns.tolist()
exposure = exposure.drop_duplicates(subset=subset,keep=False)
print("成功完成曝光表和点击表的差集合")
......@@ -66,6 +66,8 @@ print(data.head(2))
data = shuffle(data)
print("start ffm transform")
# ffm 格式转换函数、类
class FFMFormatPandas:
def __init__(self):
......@@ -123,6 +125,7 @@ class FFMFormatPandas:
t = df.dtypes.to_dict()
return pd.Series({idx: self.transform_row_(row, t) for idx, row in df.iterrows()})
ffm_train = FFMFormatPandas()
data = ffm_train.fit_transform(data, y='y')
print("done transform ffm")
......@@ -131,6 +134,8 @@ data = pd.read_csv("/home/zhangyanzhao/data.csv",header=None)
print("数据集大小")
print(data.shape)
print(data.head(2))
'''
n = np.rint(data.shape[0]/8)
m = np.rint(data.shape[0]*(3/8))
# 1/8的数据集用来做测试集
......@@ -148,19 +153,22 @@ train = data.loc[m+1:]
print("训练集大小")
print(train.shape)
train.to_csv("/home/zhangyanzhao/train.csv",index = False,header = None)
'''
print("start training")
ffm_model = xl.create_ffm()
ffm_model.setTrain("/home/zhangyanzhao/train.csv")
ffm_model.setValidate("/home/zhangyanzhao/validation.csv")
ffm_model.setTrain("/home/zhangyanzhao/data.csv")
# ffm_model.setValidate("/home/zhangyanzhao/validation.csv")
param = {'task':'binary', 'lr':0.2,
param = {'task':'binary', 'lr':0.05,
'lambda':0.002, 'metric':'auc'}
ffm_model.fit(param, '/home/zhangyanzhao/model.out')
'''
ffm_model.setTest("/home/zhangyanzhao/test.csv")
ffm_model.setSigmoid()
ffm_model.predict("/home/zhangyanzhao/model.out", "/home/zhangyanzhao/output.txt")
'''
print("end")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment