Commit cf011fbf authored by 张彦钊's avatar 张彦钊

修改ffm多进程方法

parent 42ab11d1
...@@ -89,7 +89,7 @@ class multiFFMFormatPandas: ...@@ -89,7 +89,7 @@ class multiFFMFormatPandas:
return self return self
def fit_transform(self, df, y=None,n=10000,processes=5): def fit_transform(self, df, y=None,n=1000000,processes=6):
# n是每个线程运行最大的数据条数,processes是线程数 # n是每个线程运行最大的数据条数,processes是线程数
self.fit(df, y) self.fit(df, y)
n = n n = n
...@@ -112,7 +112,7 @@ class multiFFMFormatPandas: ...@@ -112,7 +112,7 @@ class multiFFMFormatPandas:
ffm.append('{}:{}:{}'.format(self.field_index_[col], self.feature_index_[col], val)) ffm.append('{}:{}:{}'.format(self.field_index_[col], self.feature_index_[col], val))
return ' '.join(ffm) return ' '.join(ffm)
def transform(self, df,n=10000,processes=1): def transform(self, df,n=10000,processes=2):
# n是每个线程运行最大的数据条数,processes是线程数 # n是每个线程运行最大的数据条数,processes是线程数
t = df.dtypes.to_dict() t = df.dtypes.to_dict()
data_list = self.data_split_line(df,n) data_list = self.data_split_line(df,n)
...@@ -120,11 +120,12 @@ class multiFFMFormatPandas: ...@@ -120,11 +120,12 @@ class multiFFMFormatPandas:
# 设置进程的数量 # 设置进程的数量
pool = Pool(processes) pool = Pool(processes)
print("总进度: " + str(len(data_list))) print("总进度: " + str(len(data_list)))
result_map = {}
for i in range(len(data_list)): for i in range(len(data_list)):
data_list[i] = pool.apply_async(self.pool_function, (data_list[i], t,)) data_list[i] = pool.apply_async(self.pool_function, (data_list[i], t,))
result_map.update(data_list[i].get())
result_map = {}
for i in data_list:
result_map.update(i.get())
pool.close() pool.close()
pool.join() pool.join()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment