Commit 7c8d6595 authored by 郭羽's avatar 郭羽

update feature

parent 66f2ea84
...@@ -819,7 +819,7 @@ if __name__ == '__main__': ...@@ -819,7 +819,7 @@ if __name__ == '__main__':
"""训练数据保存 ======================================""" """训练数据保存 ======================================"""
timestmp3 = int(round(time.time())) timestmp3 = int(round(time.time()))
train_columns = model_columns + ["label", "timestamp", "page_stay"] train_columns = model_columns + ["label", "timestamp", "rating"]
trainSamples = samplesWithUserFeatures.select(*train_columns) trainSamples = samplesWithUserFeatures.select(*train_columns)
print("write to hdfs start...") print("write to hdfs start...")
splitTimestamp = int(time.mktime(time.strptime(addDays(0), "%Y%m%d"))) splitTimestamp = int(time.mktime(time.strptime(addDays(0), "%Y%m%d")))
......
...@@ -92,8 +92,8 @@ def getWeight(x): ...@@ -92,8 +92,8 @@ def getWeight(x):
def getDataSet(df,shuffleSize = 10000,batchSize=128): def getDataSet(df,shuffleSize = 10000,batchSize=128):
# print(df.dtypes) # print(df.dtypes)
labels = df.pop('label') labels = df.pop('label')
# df["page_stay"] = df["page_stay"].map(getWeight) # df["rating"] = df["rating"].map(getWeight)
# weights = df.pop('page_stay') # weights = df.pop('rating')
dataSet = tf.data.Dataset.from_tensor_slices((dict(df), labels)).shuffle(shuffleSize).batch(batchSize) dataSet = tf.data.Dataset.from_tensor_slices((dict(df), labels)).shuffle(shuffleSize).batch(batchSize)
return dataSet return dataSet
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment