# encoding = "utf-8" import pymysql import pandas as pd import numpy as np # 从数据库的表里获取数据,并转化成df格式 def con_sql(sql): db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') cursor = db.cursor() cursor.execute(sql) result = cursor.fetchall() df = pd.DataFrame(list(result)).dropna() db.close() return df # ffm 格式转换函数、类 class FFMFormatPandas: def __init__(self): self.field_index_ = None self.feature_index_ = None self.y = None def fit(self, df, y=None): self.y = y df_ffm = df[df.columns.difference([self.y])] if self.field_index_ is None: self.field_index_ = {col: i for i, col in enumerate(df_ffm)} if self.feature_index_ is not None: last_idx = max(list(self.feature_index_.values())) if self.feature_index_ is None: self.feature_index_ = dict() last_idx = 0 for col in df.columns: vals = df[col].unique() for val in vals: if pd.isnull(val): continue name = '{}_{}'.format(col, val) if name not in self.feature_index_: self.feature_index_[name] = last_idx last_idx += 1 self.feature_index_[col] = last_idx last_idx += 1 return self def fit_transform(self, df, y=None): self.fit(df, y) return self.transform(df) def transform_row_(self, row, t): ffm = [] if self.y != None: ffm.append(str(row.loc[row.index == self.y][0])) if self.y is None: ffm.append(str(0)) for col, val in row.loc[row.index != self.y].to_dict().items(): col_type = t[col] name = '{}_{}'.format(col, val) if col_type.kind == 'O': ffm.append('{}:{}:1'.format(self.field_index_[col], self.feature_index_[name])) elif col_type.kind == 'i': ffm.append('{}:{}:{}'.format(self.field_index_[col], self.feature_index_[col], val)) return ' '.join(ffm) def transform(self, df): t = df.dtypes.to_dict() return pd.Series({idx: self.transform_row_(row, t) for idx, row in df.iterrows()})