Commit 6b6f8d78 authored by 张彦钊's avatar 张彦钊

fix bugs

parent edc69b33
...@@ -128,16 +128,26 @@ data = ffm_train.fit_transform(data, y='y') ...@@ -128,16 +128,26 @@ data = ffm_train.fit_transform(data, y='y')
print("done transform ffm") print("done transform ffm")
data.to_csv("/home/zhangyanzhao/data.csv",index=False) data.to_csv("/home/zhangyanzhao/data.csv",index=False)
data = pd.read_csv("/home/zhangyanzhao/data.csv",header=None) data = pd.read_csv("/home/zhangyanzhao/data.csv",header=None)
print("数据集大小")
print(data.shape)
print(data.head(2))
n = np.rint(data.shape[0]/8) n = np.rint(data.shape[0]/8)
m = np.rint(data.shape[0]*(3/8)) m = np.rint(data.shape[0]*(3/8))
# 1/8的数据集用来做测试集 # 1/8的数据集用来做测试集
data.loc[:n].to_csv("/home/zhangyanzhao/test.csv",index = False,header = None) test = data.loc[:n]
print("测试集大小")
print(test.shape)
test.to_csv("/home/zhangyanzhao/test.csv",index = False,header = None)
# 1/4的数据集用来做验证集 # 1/4的数据集用来做验证集
data.loc[n+1:m].to_csv("/home/zhangyanzhao/validation.csv",index = False,header = None) validation = data.loc[n+1:m]
# 剩余的数据集用来做验证集 validation.to_csv("/home/zhangyanzhao/validation.csv",index = False,header = None)
data.loc[m+1:].to_csv("/home/zhangyanzhao/train.csv",index = False,header = None) print("验证集大小")
# 销毁data,目的是为了节省内存 print(validation.shape)
data = data.drop(data.index.tolist()) # 剩余的数据集用来做训练集
train = data.loc[m+1:]
print("训练集大小")
print(train.shape)
train.to_csv("/home/zhangyanzhao/train.csv",index = False,header = None)
print("start training") print("start training")
ffm_model = xl.create_ffm() ffm_model = xl.create_ffm()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment