Commit 36e8047b authored by 郭羽's avatar 郭羽

service model 优化

parent be8de455
...@@ -164,18 +164,33 @@ def test(df_train): ...@@ -164,18 +164,33 @@ def test(df_train):
FEATURE_COLUMN_KEY = "Strategy:rec:column:service:" + VERSION FEATURE_COLUMN_KEY = "Strategy:rec:column:service:" + VERSION
conn = connUtils.getRedisConn4() conn = connUtils.getRedisConn4()
s = conn.get(FEATURE_COLUMN_KEY) s = conn.get(FEATURE_COLUMN_KEY)
# s = conn.get(FEATURE_USER_KEY+"869982038583034") user = conn.get(FEATURE_USER_KEY+"869982038583034")
item = conn.get(FEATURE_ITEM_KEY+"5884526")
datasColumnss = json.loads(s,encoding='utf-8') datasColumnss = json.loads(s,encoding='utf-8')
user_d = json.loads(user,encoding='utf-8')
item_d = json.loads(item,encoding='utf-8')
ddd = {}
for d in datasColumnss:
res = ["-1"]
if d in user_d:
res = [user_d[d]]
if d in item_d:
res = [item_d[d]]
ddd[d] = res
# #
# conn.get() # conn.get()
ddd = {} # ddd = {}
datasColumnss = df_train.columns.to_list() # datasColumnss = df_train.columns.to_list()
dd = df_train.sample(n=10) # dd = df_train.sample(n=10)
for c in datasColumnss: # for c in datasColumnss:
vvv = dd[c].tolist() # vvv = dd[c].tolist()
ddd[c] = vvv # ddd[c] = vvv
pre_data = {"inputs":ddd} pre_data = {"inputs":ddd}
pre_data = json.dumps(pre_data) pre_data = json.dumps(pre_data)
...@@ -187,49 +202,49 @@ def test(df_train): ...@@ -187,49 +202,49 @@ def test(df_train):
if __name__ == '__main__': if __name__ == '__main__':
curTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # curTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print("train_service执行时间:{}".format(curTime)) # print("train_service执行时间:{}".format(curTime))
splitTimestamp = int(time.mktime(time.strptime(addDays(-1), "%Y%m%d"))) # splitTimestamp = int(time.mktime(time.strptime(addDays(-1), "%Y%m%d")))
#
# redis中加载数据字典 # # redis中加载数据字典
print("加载模型字典...") # print("加载模型字典...")
data_vocab = json.load(open(configUtils.VOCAB_PATH,mode='r',encoding='utf-8')) # data_vocab = json.load(open(configUtils.VOCAB_PATH,mode='r',encoding='utf-8'))
print("字典keys:",str(data_vocab.keys())) # print("字典keys:",str(data_vocab.keys()))
# data_vocab = getDataVocabFromRedis(VERSION) # # data_vocab = getDataVocabFromRedis(VERSION)
assert data_vocab # assert data_vocab
#
# timestmp1 = int(round(time.time()))
# df_train = loadData(data_path_train)
# print(df_train.dtypes)
# print("训练数据列:",df_train.columns)
#
# df_test = df_train.loc[df_train['timestamp']>=splitTimestamp]
# df_train = df_train.loc[df_train['timestamp'] < splitTimestamp]
# # df_test = loadData(data_path_test)
# timestmp2 = int(round(time.time()))
# print("读取数据耗时s:{}".format(timestmp2 - timestmp1))
#
# # 获取训练列
# columns = df_train.columns.tolist()
# print("原始数据列:")
# print(columns)
# emb_columns,number_columns,inputs = getTrainColumns(columns, data_vocab)
# print("训练列:")
# datasColumns = list(inputs.keys())
# print(datasColumns)
#
# df_train = df_train[datasColumns + ["label"]]
# df_test = df_test[datasColumns + ["label"]]
#
# trainSize = df_train["label"].count()
# print("trainSize:{}".format(trainSize))
# testSize = df_test["label"].count()
# print("trainSize:{},testSize{}".format(trainSize,testSize))
#
# # 数据类型转换
# df_train = csvTypeConvert(datasColumns,df_train,data_vocab)
# df_test = csvTypeConvert(datasColumns,df_test,data_vocab)
timestmp1 = int(round(time.time())) test(None)
df_train = loadData(data_path_train)
print(df_train.dtypes)
print("训练数据列:",df_train.columns)
df_test = df_train.loc[df_train['timestamp']>=splitTimestamp]
df_train = df_train.loc[df_train['timestamp'] < splitTimestamp]
# df_test = loadData(data_path_test)
timestmp2 = int(round(time.time()))
print("读取数据耗时s:{}".format(timestmp2 - timestmp1))
# 获取训练列
columns = df_train.columns.tolist()
print("原始数据列:")
print(columns)
emb_columns,number_columns,inputs = getTrainColumns(columns, data_vocab)
print("训练列:")
datasColumns = list(inputs.keys())
print(datasColumns)
df_train = df_train[datasColumns + ["label"]]
df_test = df_test[datasColumns + ["label"]]
trainSize = df_train["label"].count()
print("trainSize:{}".format(trainSize))
testSize = df_test["label"].count()
print("trainSize:{},testSize{}".format(trainSize,testSize))
# 数据类型转换
df_train = csvTypeConvert(datasColumns,df_train,data_vocab)
df_test = csvTypeConvert(datasColumns,df_test,data_vocab)
test(df_train)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment