Commit 3d6af2ed authored by 郭羽's avatar 郭羽

美购精排模型

parent 54ab78e5
...@@ -282,7 +282,7 @@ def collectFeaturesToDict(samples,columns,prefix): ...@@ -282,7 +282,7 @@ def collectFeaturesToDict(samples,columns,prefix):
idCol = prefix+"id" idCol = prefix+"id"
#根据timestamp获取每个user最新的记录 #根据timestamp获取每个user最新的记录
prefixSamples = samples.groupBy(idCol).agg(F.max("timestamp").alias("timestamp")) prefixSamples = samples.groupBy(idCol).agg(F.max("timestamp").alias("timestamp"))
resDatas = prefixSamples.join(samples, on=[idCol,"timestamp"], how='left').select(*columns).distinct().collect() resDatas = prefixSamples.join(samples, on=[idCol,"timestamp"], how='inner').select(*columns).distinct().collect()
return {d[idCol]:json.dumps(d.asDict(),ensure_ascii=False) for d in resDatas} return {d[idCol]:json.dumps(d.asDict(),ensure_ascii=False) for d in resDatas}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment