Commit 03735f88 authored by 宋柯's avatar 宋柯

模型调试

parent 7d5ccd4b
...@@ -291,10 +291,20 @@ def wilson_ctr(num_pv, num_click): ...@@ -291,10 +291,20 @@ def wilson_ctr(num_pv, num_click):
score = (p + z*z/(2*n) - z*math.sqrt((p*(1.0 - p) + z*z /(4.0*n))/n)) / (1.0 + z*z/n); score = (p + z*z/(2*n) - z*math.sqrt((p*(1.0 - p) + z*z /(4.0*n))/n)) / (1.0 + z*z/n);
return float(score); return float(score);
def addUserFeatures(samples,dataVocab,multiVocab): def getUserProfileFeature(samples_iEsF_iStatisticF, spark, startDay, endDay):
dataVocab["userid"] = collectColumnToVocab(samples, "userid") #连接doris_olap库
dataVocab["user_city_id"] = collectColumnToVocab(samples, "user_city_id") table_query = """
dataVocab["user_os"] = ["ios","android"] select date as dt, cl_id as device_id, first_solutions, second_solutions, first_demands, second_demands, first_positions, second_positions, projects
from user_tag3_portrait
where date >= {startDay} and date <= {endDay}
""".format(startDay, endDay)
userProfileFeatureDF = spark.read.jdbc('jdbc:mysql://172.16.30.136:3306/doris_olap', table_query, column = 'cl_id', numPartitions = 100,
properties = { 'user': 'doris_olap', 'password': 'bA27hXasdfswuolap', 'driver': 'com.mysql.jdbc.Driver' })
userProfileFeatureDF.show(100, False)
def addUserFeatures(samples):
extractTagsUdf = F.udf(extractTags, ArrayType(StringType())) extractTagsUdf = F.udf(extractTags, ArrayType(StringType()))
arrayReverseUdf = F.udf(arrayReverse, ArrayType(StringType())) arrayReverseUdf = F.udf(arrayReverse, ArrayType(StringType()))
...@@ -307,7 +317,6 @@ def addUserFeatures(samples,dataVocab,multiVocab): ...@@ -307,7 +317,6 @@ def addUserFeatures(samples,dataVocab,multiVocab):
for i in range(1,11): for i in range(1,11):
samples = samples.withColumn("userRatedHistory"+str(i), F.when(F.col("userPositiveHistory")[i-1].isNotNull(),F.col("userPositiveHistory")[i-1]).otherwise("-1")) samples = samples.withColumn("userRatedHistory"+str(i), F.when(F.col("userPositiveHistory")[i-1].isNotNull(),F.col("userPositiveHistory")[i-1]).otherwise("-1"))
dataVocab["userRatedHistory"+str(i)] = dataVocab["item_id"]
samples = samples.drop("userPositiveHistory") samples = samples.drop("userPositiveHistory")
# user偏好 # user偏好
...@@ -923,7 +932,8 @@ if __name__ == '__main__': ...@@ -923,7 +932,8 @@ if __name__ == '__main__':
spark = get_spark("SERVICE_FEATURE_CSV_EXPORT_SK") spark = get_spark("SERVICE_FEATURE_CSV_EXPORT_SK")
spark.sparkContext.setLogLevel("ERROR") spark.sparkContext.setLogLevel("ERROR")
getUserProfileFeature(None, spark, addDays(-trainDays - 1), addDays(-1))
sys.exit()
#获取点击曝光数据 #获取点击曝光数据
clickDF, expDF, ratingDF, startDay, endDay = get_click_exp_rating_df(trainDays, spark) clickDF, expDF, ratingDF, startDay, endDay = get_click_exp_rating_df(trainDays, spark)
...@@ -941,18 +951,13 @@ if __name__ == '__main__': ...@@ -941,18 +951,13 @@ if __name__ == '__main__':
.join(expStaticFeatures, on = ["item_id", "partition_date"], how = 'left')\ .join(expStaticFeatures, on = ["item_id", "partition_date"], how = 'left')\
.join(itemEsFeatureDF, on = ["item_id"], how = 'left') .join(itemEsFeatureDF, on = ["item_id"], how = 'left')
#item 统计 特征 Process
samples_iEsF_iStatisticF = itemStatisticFeaturesProcess(samples_iEsF_iStatisticF) samples_iEsF_iStatisticF = itemStatisticFeaturesProcess(samples_iEsF_iStatisticF)
sys.exit(1) #user profile feature
# 统计数据处理 samplesWithUserFeatures = getUserProfileFeature(samples_iEsF_iStatisticF, spark)
# ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab)
# samples = ratingDF.join(itemStaticDF, on=['item_id'], how='inner')
print("处理user特征...")
samplesWithUserFeatures = addUserFeatures(samples,dataVocab,multiVocab)
timestmp3 = int(round(time.time()))
print("处理user特征, 耗时s:{}".format(timestmp3 - timestmp2))
# #
# user columns # user columns
user_columns = [c for c in samplesWithUserFeatures.columns if c.startswith("user")] user_columns = [c for c in samplesWithUserFeatures.columns if c.startswith("user")]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment