Commit c295ba63 authored by 郭羽's avatar 郭羽

service model 优化

parent 1affd87e
......@@ -260,14 +260,11 @@ def splitAndSaveTrainingTestSamplesByTimeStamp(samples,splitTimestamp, file_path
test.write.option("header", "true").option("delimiter", "|").mode('overwrite').csv(testSavePath)
def collectColumnToVocab(samples,column):
pd.DataFrame()[""].tolist()
list(set(samples[column].tolist()))
datas = samples.select(column).distinct().collect()
vocabSet = set()
for d in datas:
if d[column]:
vocabSet.add(str(d[column]))
vocabSet.add("-1") # 空值的默认
return list(vocabSet)
def collectMutiColumnToVocab(samples,column):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment