Commit a1eb1908 authored by 郭羽's avatar 郭羽

美购精排模型

parent 5c95959b
......@@ -280,9 +280,11 @@ def featureToRedis(key,datas):
def collectFeaturesToDict(samples,columns,prefix):
idCol = prefix+"id"
timestampCol = idCol+"_timestamp"
#根据timestamp获取每个user最新的记录
prefixSamples = samples.groupBy(idCol).agg(F.max("timestamp").alias("timestamp"))
resDatas = prefixSamples.join(samples, on=[idCol,"timestamp"], how='left').select(*columns).distinct().collect()
prefixSamples = samples.groupBy(idCol).agg(F.max("timestamp").alias(timestampCol))
resDatas = samples.join(prefixSamples, on=[idCol], how='left').where(F.col("timestamp") == F.col(timestampCol))
resDatas = resDatas.select(*columns).distinct().collect()
print(prefix,len(resDatas))
return {d[idCol]:json.dumps(d.asDict(),ensure_ascii=False) for d in resDatas}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment