Commit 9659db45 authored by 郭羽's avatar 郭羽

美购精排模型

parent 6d81b5f7
...@@ -534,31 +534,31 @@ def addDays(n, format="%Y%m%d"): ...@@ -534,31 +534,31 @@ def addDays(n, format="%Y%m%d"):
if __name__ == '__main__': if __name__ == '__main__':
# start = time.time() start = time.time()
# #入参 #入参
# trainDays = int(sys.argv[1]) trainDays = int(sys.argv[1])
# print('trainDays:{}'.format(trainDays),flush=True) print('trainDays:{}'.format(trainDays),flush=True)
# spark = get_spark("service_feature_csv_export") spark = get_spark("service_feature_csv_export")
# spark.sparkContext.setLogLevel("ERROR")
#
# endDay = addDays(-1)
# startDay = addDays(-(1 + int(trainDays)))
#
# print(startDay,endDay)
#
# itemDF = get_service_feature_df(spark)
# # 行为数据
# clickDF = getClickData(spark,startDay,endDay)
# exposureDF = getExposureData(spark,startDay,endDay)
# ratingDF = samplesNegAndUnion(clickDF,exposureDF)
conf = SparkConf().setAppName('featureEngineering').setMaster('local')
spark = SparkSession.builder.config(conf=conf).getOrCreate()
spark.sparkContext.setLogLevel("ERROR") spark.sparkContext.setLogLevel("ERROR")
itemDF = spark.read.format('csv').option('header', 'true').option('sep', '|').load("service_item.csv")
ratingDF = spark.read.format('csv').option('header', 'true').option('sep', '|').load("service_rating.csv")
ratingDF = ratingDF.withColumn("rating",F.when(col("label")>=1,1).otherwise(0)) endDay = addDays(-1)
startDay = addDays(-(1 + int(trainDays)))
print(startDay,endDay)
itemDF = get_service_feature_df(spark)
# 行为数据
clickDF = getClickData(spark,startDay,endDay)
exposureDF = getExposureData(spark,startDay,endDay)
ratingDF = samplesNegAndUnion(clickDF,exposureDF)
# conf = SparkConf().setAppName('featureEngineering').setMaster('local')
# spark = SparkSession.builder.config(conf=conf).getOrCreate()
# spark.sparkContext.setLogLevel("ERROR")
# itemDF = spark.read.format('csv').option('header', 'true').option('sep', '|').load("service_item.csv")
# ratingDF = spark.read.format('csv').option('header', 'true').option('sep', '|').load("service_rating.csv")
# ratingDF = ratingDF.withColumn("rating",F.when(col("label")>=1,1).otherwise(0))
ratingDF = ratingDF.withColumnRenamed("time_stamp", "timestamp")\ ratingDF = ratingDF.withColumnRenamed("time_stamp", "timestamp")\
.withColumnRenamed("device_id", "userid")\ .withColumnRenamed("device_id", "userid")\
.withColumnRenamed("card_id", "itemid")\ .withColumnRenamed("card_id", "itemid")\
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment