Commit 1e28e399 authored by 张彦钊's avatar 张彦钊

add test file

parent 6bffb6ec
import json import json
import numpy as np
from pyspark import SparkContext from pyspark import SparkContext
from pyspark.sql import SQLContext from pyspark.sql import SQLContext
import pandas as pd import pandas as pd
from pyspark import SparkConf from pyspark import SparkConf
from multiprocessing import Pool from multiprocessing import Pool
import pymysql
class multiFFMFormatPandas: class multiFFMFormatPandas:
def __init__(self): def __init__(self):
self.field_index_ = None self.field_index_ = None
...@@ -24,17 +26,6 @@ class multiFFMFormatPandas: ...@@ -24,17 +26,6 @@ class multiFFMFormatPandas:
self.feature_index_ = dict() self.feature_index_ = dict()
last_idx = 0 last_idx = 0
for col in df.columns:
vals = df[col].unique()
for val in vals:
if pd.isnull(val):
if self.feature_index_ is not None:
last_idx = max(list(self.feature_index_.values()))
if self.feature_index_ is None:
self.feature_index_ = dict()
last_idx = 0
for col in df.columns: for col in df.columns:
vals = df[col].unique() vals = df[col].unique()
for val in vals: for val in vals:
...@@ -121,6 +112,17 @@ class multiFFMFormatPandas: ...@@ -121,6 +112,17 @@ class multiFFMFormatPandas:
data_list.append(data.iloc[x:data.__len__()]) data_list.append(data.iloc[x:data.__len__()])
break break
'''
# 返回生成器方法,但是本地测试效率不高
x = 0
while True:
if x + step < data.__len__():
yield data.iloc[x:x + step]
x = x + step + 1
else:
yield data.iloc[x:data.__len__()]
break
'''
return data_list return data_list
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment