Commit abc9ec79 authored by 宋柯's avatar 宋柯

模型调试

parent 73a98c5e
...@@ -45,10 +45,10 @@ DATA_PATH_TRAIN = "/data/files/service_feature_{}_train.csv".format(VERSION) ...@@ -45,10 +45,10 @@ DATA_PATH_TRAIN = "/data/files/service_feature_{}_train.csv".format(VERSION)
def getRedisConn(): def getRedisConn():
pool = redis.ConnectionPool(host="172.16.50.145",password="XfkMCCdWDIU%ls$h",port=6379,db=0) # pool = redis.ConnectionPool(host="172.16.50.145",password="XfkMCCdWDIU%ls$h",port=6379,db=0)
conn = redis.Redis(connection_pool=pool) # conn = redis.Redis(connection_pool=pool)
# conn = redis.Redis(host="172.16.50.145", port=6379, password="XfkMCCdWDIU%ls$h",db=0) # conn = redis.Redis(host="172.16.50.145", port=6379, password="XfkMCCdWDIU%ls$h",db=0)
# conn = redis.Redis(host="172.18.51.10", port=6379,db=0) #test conn = redis.Redis(host="172.18.51.10", port=6379,db=0) #test
return conn return conn
def parseTags(tags,i): def parseTags(tags,i):
...@@ -433,6 +433,12 @@ def dataVocabToRedis(dataVocab): ...@@ -433,6 +433,12 @@ def dataVocabToRedis(dataVocab):
conn.set(FEATURE_VOCAB_KEY,dataVocab) conn.set(FEATURE_VOCAB_KEY,dataVocab)
conn.expire(FEATURE_VOCAB_KEY,60 * 60 * 24 * 7) conn.expire(FEATURE_VOCAB_KEY,60 * 60 * 24 * 7)
def saveVocab(key, vocab):
conn = getRedisConn()
conn.lpush(key,vocab)
conn.expire(FEATURE_VOCAB_KEY,60 * 60 * 24)
def featureColumnsToRedis(columns): def featureColumnsToRedis(columns):
conn = getRedisConn() conn = getRedisConn()
conn.set(FEATURE_COLUMN_KEY, json.dumps(columns)) conn.set(FEATURE_COLUMN_KEY, json.dumps(columns))
...@@ -1016,10 +1022,14 @@ if __name__ == '__main__': ...@@ -1016,10 +1022,14 @@ if __name__ == '__main__':
write_time_start = time.time() write_time_start = time.time()
for categoty_field in categoty_fields: for categoty_field in categoty_fields:
output_file = "file:///home/gmuser/" + categoty_field + "_vocab" output_file = "file:///home/gmuser/" + categoty_field + "_vocab"
train_samples.select(categoty_field).where(F.col(categoty_field) != '-1').where(F.col(categoty_field) != '').distinct().write.mode("overwrite").options(header="false").csv(output_file) # train_samples.select(categoty_field).where(F.col(categoty_field) != '-1').where(F.col(categoty_field) != '').distinct().write.mode("overwrite").options(header="false").csv(output_file)
categoty_field_rows = train_samples.select(categoty_field).where(F.col(categoty_field) != '-1').where(F.col(categoty_field) != '').distinct().collect()
saveVocab("strategy:" + categoty_field + ":vocab", list(map(lambda row: row[categoty_field], categoty_field_rows)))
for multi_categoty_field in multi_categoty_fields: for multi_categoty_field in multi_categoty_fields:
output_file = "file:///home/gmuser/" + multi_categoty_field + "_vocab" output_file = "file:///home/gmuser/" + multi_categoty_field + "_vocab"
train_samples.selectExpr("explode(split({multi_categoty_field},','))".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').distinct().write.mode("overwrite").options(header="false").csv(output_file) # train_samples.selectExpr("explode(split({multi_categoty_field},','))".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').distinct().write.mode("overwrite").options(header="false").csv(output_file)
multi_categoty_field_rows = train_samples.selectExpr("explode(split({multi_categoty_field},',')) as {multi_categoty_field}".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').distinct().collect()
saveVocab("strategy:" + multi_categoty_field + ":vocab", list(map(lambda row: row[multi_categoty_field], multi_categoty_field_rows)))
output_file = "file:///home/gmuser/train_samples" output_file = "file:///home/gmuser/train_samples"
train_samples.write.mode("overwrite").options(header="false", sep='|').csv(output_file) train_samples.write.mode("overwrite").options(header="false", sep='|').csv(output_file)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment