Commit d09fd710 authored by 郭羽's avatar 郭羽

美购精排模型

parent 04316d6f
......@@ -2,7 +2,10 @@ import tensorflow as tf
import json
import pandas as pd
import time
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
import utils.connUtils as connUtils
ITEM_NUMBER_COLUMNS = ["smart_rank2"]
......@@ -37,7 +40,6 @@ def getDataVocabFromRedis(version):
# 数据类型转换
def csvTypeConvert(df,data_vocab):
df = df.fillna("-1")
# 离散na值填充
for k, v in data_vocab.items():
df[k] = df[k].fillna("-1")
......
......@@ -186,7 +186,7 @@ def splitAndSaveTrainingTestSamplesByTimeStamp(samples,splitTimestamp, file_path
# splitTimestamp = quantile[0]
train = samples.where(F.col("timestampLong") <= splitTimestamp).drop("timestampLong")
test = samples.where(F.col("timestampLong") > splitTimestamp).drop("timestampLong")
print("split train size:{},test size:".format(str(train.count()),str(test.count())))
print("split train size:{},test size:{}".format(str(train.count()),str(test.count())))
trainingSavePath = file_path + '_train'
testSavePath = file_path + '_test'
train.write.option("header", "true").option("delimiter", "|").mode('overwrite').csv(trainingSavePath)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment