Commit 7c8d6595 authored by 郭羽's avatar 郭羽

update feature

parent 66f2ea84
......@@ -819,7 +819,7 @@ if __name__ == '__main__':
"""训练数据保存 ======================================"""
timestmp3 = int(round(time.time()))
train_columns = model_columns + ["label", "timestamp", "page_stay"]
train_columns = model_columns + ["label", "timestamp", "rating"]
trainSamples = samplesWithUserFeatures.select(*train_columns)
print("write to hdfs start...")
splitTimestamp = int(time.mktime(time.strptime(addDays(0), "%Y%m%d")))
......
......@@ -92,8 +92,8 @@ def getWeight(x):
def getDataSet(df,shuffleSize = 10000,batchSize=128):
# print(df.dtypes)
labels = df.pop('label')
# df["page_stay"] = df["page_stay"].map(getWeight)
# weights = df.pop('page_stay')
# df["rating"] = df["rating"].map(getWeight)
# weights = df.pop('rating')
dataSet = tf.data.Dataset.from_tensor_slices((dict(df), labels)).shuffle(shuffleSize).batch(batchSize)
return dataSet
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment