Commit 6ced4f40 authored by 张彦钊's avatar 张彦钊

delete predict function param argument

parent dcc31112
......@@ -15,7 +15,6 @@ def train():
print("predicting")
ffm_model.setTest(DIRECTORY_PATH + "test_ffm_data.csv")
ffm_model.setSigmoid()
param_predict = {"log": "/data2/models/result"}
ffm_model.predict(param_predict,DIRECTORY_PATH + "model.out",DIRECTORY_PATH + "test_set_predict_output.txt")
ffm_model.predict(DIRECTORY_PATH + "model.out",DIRECTORY_PATH + "test_set_predict_output.txt")
......@@ -48,9 +48,7 @@ def predict(user_profile):
ffm_model.setTest(instance_file_path)
ffm_model.setSigmoid()
#日志保存路径,如果不加这个参数,日志默认保存在/temp路径下,不符合规范
param = {"log": "/data2/models/result"}
ffm_model.predict(param,DIRECTORY_PATH + "model.out",
ffm_model.predict(DIRECTORY_PATH + "model.out",
DIRECTORY_PATH + "result/{0}_output.txt".format(user_profile['device_id']))
print("该用户预测结束")
predict_save_to_local(user_profile, instance)
......
......@@ -94,7 +94,7 @@ class multiFFMFormatPandas:
self.fit(df, y)
n = n
processes = processes
return self.transform(df,n=n,processes=processes)
return self.transform(df,n,processes)
def transform_row_(self, row, t):
ffm = []
......@@ -112,17 +112,13 @@ class multiFFMFormatPandas:
ffm.append('{}:{}:{}'.format(self.field_index_[col], self.feature_index_[col], val))
return ' '.join(ffm)
# def transform(self, df):
# t = df.dtypes.to_dict()
# return pd.Series({idx: self.transform_row_(row, t) for idx, row in df.iterrows()})
def transform(self, df,n=10000,processes=1):
# n是每个线程运行最大的数据条数,processes是线程数
t = df.dtypes.to_dict()
data_list = self.data_split_line(df,n)
# 设置进程的数量
pool = Pool(processes=processes)
pool = Pool(processes)
for i in range(len(data_list)):
print("处理进度: "+str(i+1)+"/"+str(len(data_list)))
data_list[i] = pool.apply_async(self.pool_function, (data_list[i], t,))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment