add test file

1e28e399 · 张彦钊 · 6bffb6ec · 1e28e399
Commit 1e28e399 authored Dec 06, 2018 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 11 deletions

test.py tensnsorflow/test.py +13 -11

No files found.
--- a/tensnsorflow/test.py
+++ b/tensnsorflow/test.py
 import json
+import numpy as np
 from pyspark import SparkContext
 from pyspark.sql import SQLContext
 import pandas as pd
 from pyspark import  SparkConf
 from multiprocessing import Pool
+import pymysql
 class multiFFMFormatPandas:
    def __init__(self):
        self.field_index_ = None
@@ -24,17 +26,6 @@ class multiFFMFormatPandas:
            self.feature_index_ = dict()
            last_idx = 0
-        for col in df.columns:
-            vals = df[col].unique()
-            for val in vals:
-                if pd.isnull(val):
-        if self.feature_index_ is not None:
-            last_idx = max(list(self.feature_index_.values()))
-        if self.feature_index_ is None:
-            self.feature_index_ = dict()
-            last_idx = 0
        for col in df.columns:
            vals = df[col].unique()
            for val in vals:
@@ -121,6 +112,17 @@ class multiFFMFormatPandas:
                data_list.append(data.iloc[x:data.__len__()])
                break
+        '''
+        # 返回生成器方法，但是本地测试效率不高
+        x = 0
+        while True:
+            if x + step < data.__len__():
+                yield data.iloc[x:x + step]
+                x = x + step + 1
+            else:
+                yield data.iloc[x:data.__len__()]
+                break
+        '''
        return data_list