Commit d09fd710 authored by 郭羽's avatar 郭羽

美购精排模型

parent 04316d6f
...@@ -2,7 +2,10 @@ import tensorflow as tf ...@@ -2,7 +2,10 @@ import tensorflow as tf
import json import json
import pandas as pd import pandas as pd
import time import time
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
import utils.connUtils as connUtils import utils.connUtils as connUtils
ITEM_NUMBER_COLUMNS = ["smart_rank2"] ITEM_NUMBER_COLUMNS = ["smart_rank2"]
...@@ -37,7 +40,6 @@ def getDataVocabFromRedis(version): ...@@ -37,7 +40,6 @@ def getDataVocabFromRedis(version):
# 数据类型转换 # 数据类型转换
def csvTypeConvert(df,data_vocab): def csvTypeConvert(df,data_vocab):
df = df.fillna("-1")
# 离散na值填充 # 离散na值填充
for k, v in data_vocab.items(): for k, v in data_vocab.items():
df[k] = df[k].fillna("-1") df[k] = df[k].fillna("-1")
......
...@@ -186,7 +186,7 @@ def splitAndSaveTrainingTestSamplesByTimeStamp(samples,splitTimestamp, file_path ...@@ -186,7 +186,7 @@ def splitAndSaveTrainingTestSamplesByTimeStamp(samples,splitTimestamp, file_path
# splitTimestamp = quantile[0] # splitTimestamp = quantile[0]
train = samples.where(F.col("timestampLong") <= splitTimestamp).drop("timestampLong") train = samples.where(F.col("timestampLong") <= splitTimestamp).drop("timestampLong")
test = samples.where(F.col("timestampLong") > splitTimestamp).drop("timestampLong") test = samples.where(F.col("timestampLong") > splitTimestamp).drop("timestampLong")
print("split train size:{},test size:".format(str(train.count()),str(test.count()))) print("split train size:{},test size:{}".format(str(train.count()),str(test.count())))
trainingSavePath = file_path + '_train' trainingSavePath = file_path + '_train'
testSavePath = file_path + '_test' testSavePath = file_path + '_test'
train.write.option("header", "true").option("delimiter", "|").mode('overwrite').csv(trainingSavePath) train.write.option("header", "true").option("delimiter", "|").mode('overwrite').csv(trainingSavePath)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment