Commit a70efdce authored by 郭羽's avatar 郭羽

update feature

parent 2a892ff0
...@@ -72,11 +72,29 @@ def loadData(data_path): ...@@ -72,11 +72,29 @@ def loadData(data_path):
print("读取数据耗时ms:{}".format(timestmp2 - timestmp1)) print("读取数据耗时ms:{}".format(timestmp2 - timestmp1))
return df return df
def getWeight(x):
res = 1
try:
p = int(x)
if p > 0 and p <= 5:
res = 2
elif p > 5 and p <= 10:
res = 3
elif p > 10:
res = 4
except Exception as e:
print(e)
return res
def getDataSet(df,shuffleSize = 10000,batchSize=128): def getDataSet(df,shuffleSize = 10000,batchSize=128):
# print(df.dtypes) # print(df.dtypes)
labels = df.pop('label') labels = df.pop('label')
dataSet = tf.data.Dataset.from_tensor_slices((dict(df), labels)).shuffle(shuffleSize).batch(batchSize) df["page_stay"] = labels["label"].map(lambda x:1)
weights = df.pop('page_stay')
dataSet = tf.data.Dataset.from_tensor_slices((dict(df), labels,weights)).shuffle(shuffleSize).batch(batchSize)
return dataSet return dataSet
def getTrainColumns(train_columns,data_vocab): def getTrainColumns(train_columns,data_vocab):
...@@ -182,8 +200,9 @@ if __name__ == '__main__': ...@@ -182,8 +200,9 @@ if __name__ == '__main__':
splitTimestamp = int(time.mktime(time.strptime(addDays(-1), "%Y%m%d"))) splitTimestamp = int(time.mktime(time.strptime(addDays(-1), "%Y%m%d")))
# redis中加载数据字典 # redis中加载数据字典
print("redis 中加载模型字典...") print("加载模型字典...")
data_vocab = json.load(open(configUtils.VOCAB_PATH,mode='r',encoding='utf-8')) data_vocab = json.load(open(configUtils.VOCAB_PATH,mode='r',encoding='utf-8'))
print("字典keys:",str(data_vocab.keys()))
# data_vocab = getDataVocabFromRedis(VERSION) # data_vocab = getDataVocabFromRedis(VERSION)
assert data_vocab assert data_vocab
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment