Commit b02b5b23 authored by 高雅喆's avatar 高雅喆

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

add a clickZeroUidRateDetail.py for the distribution of rate
parents 972f6248 0c65a30c
from eda.ml_tools.rocCurve import get_roc_curve from eda.ml_tools.rocCurve import get_roc_curve
import pandas as pd import pandas as pd
from config import * from config import *
......
DIRECTORY_PATH = '/data2/models/' DIRECTORY_PATH = '/data2/models/'
VALIDATION_DATE = '2018-08-05' VALIDATION_DATE = '2018-08-05'
TEST_DATE = '2018-08-06' TEST_DATE = '2018-08-06'
......
import os import os
import time import time
from config import * from config import *
......
import pymysql import pymysql
import pandas as pd import pandas as pd
from utils import * from utils import *
......
import xlearn as xl import xlearn as xl
from config import * from config import *
......
from config import * from config import *
import pandas as pd import pandas as pd
import pickle import pickle
...@@ -40,6 +42,7 @@ def transform_ffm_format(df, device_id): ...@@ -40,6 +42,7 @@ def transform_ffm_format(df, device_id):
print("ffm格式转化结束") print("ffm格式转化结束")
predict_file_name = DIRECTORY_PATH + "result/{0}_{1}DiaryTop3000.csv".format(device_id, now) predict_file_name = DIRECTORY_PATH + "result/{0}_{1}DiaryTop3000.csv".format(device_id, now)
data.to_csv(predict_file_name, index=False,header=None) data.to_csv(predict_file_name, index=False,header=None)
print("ffm写到服务器")
return predict_file_name return predict_file_name
...@@ -86,7 +89,7 @@ def predict_save_to_redis(user_profile, instance): ...@@ -86,7 +89,7 @@ def predict_save_to_redis(user_profile, instance):
def router(device_id): def router(device_id):
user_profile, not_exist = fetch_user_profile(device_id) user_profile, not_exist = fetch_user_profile(device_id)
if not_exist: if not_exist==1:
print('Sorry, we don\'t have you.') print('Sorry, we don\'t have you.')
else: else:
predict(user_profile) predict(user_profile)
...@@ -97,7 +100,7 @@ if __name__ == "__main__": ...@@ -97,7 +100,7 @@ if __name__ == "__main__":
while True: while True:
start = time.time() start = time.time()
empty,device_id_list = get_active_users() empty,device_id_list = get_active_users()
if empty: if empty==1:
time.sleep(60) time.sleep(60)
else: else:
old_device_id_list = pd.read_csv(DIRECTORY_PATH + "data_set_device_id.csv")["device_id"].values.tolist() old_device_id_list = pd.read_csv(DIRECTORY_PATH + "data_set_device_id.csv")["device_id"].values.tolist()
......
from utils import con_sql from utils import con_sql
import datetime import datetime
......
import time import time
from prepareData import fetch_data from prepareData import fetch_data
from utils import FFMFormatPandas from utils import FFMFormatPandas
......
from processData import * from processData import *
from diaryTraining import * from diaryTraining import *
from diaryCandidateSet import get_eachCityDiaryTop3000 from diaryCandidateSet import get_eachCityDiaryTop3000
......
from utils import con_sql from utils import con_sql
from datetime import datetime from datetime import datetime
...@@ -12,13 +14,15 @@ def get_active_users(): ...@@ -12,13 +14,15 @@ def get_active_users():
device_id_df = con_sql(sql) device_id_df = con_sql(sql)
if device_id_df.empty: if device_id_df.empty:
print("当下这一分钟没有活跃用户,不需要预测") print("当下这一分钟没有活跃用户,不需要预测")
return True,None # 为了debug supervisor,修改了下面的return参数
return 1,[1,2]
else: else:
device_id_list = device_id_df[0].values.tolist() device_id_list = device_id_df[0].values.tolist()
# 对device_id 进行去重 # 对device_id 进行去重
device_id_list = list(set(device_id_list)) device_id_list = list(set(device_id_list))
print("成功获取当下一分钟内活跃用户") print("成功获取当下一分钟内活跃用户")
return False,device_id_list # 为了debug supervisor,修改了下面的return参数
return "0",device_id_list
def fetch_user_profile(device_id): def fetch_user_profile(device_id):
...@@ -26,11 +30,13 @@ def fetch_user_profile(device_id): ...@@ -26,11 +30,13 @@ def fetch_user_profile(device_id):
user_profile = con_sql(sql) user_profile = con_sql(sql)
if user_profile.empty: if user_profile.empty:
print("没有获取到该用户对应的city_id") print("没有获取到该用户对应的city_id")
return {}, user_profile.empty # 为了debug supervisor,修改了下面的return参数
return {1:2}, 1
else: else:
user_profile = user_profile.rename(columns={0:"device_id",1:"city_id"}) user_profile = user_profile.rename(columns={0:"device_id",1:"city_id"})
print("成功获取该用户对应的city_id") print("成功获取该用户对应的city_id")
user_profile_dict = {} user_profile_dict = {}
for i in user_profile.columns: for i in user_profile.columns:
user_profile_dict[i] = user_profile.loc[0, i] user_profile_dict[i] = user_profile.loc[0, i]
return user_profile_dict, user_profile.empty # 为了debug supervisor,修改了下面的return参数
return user_profile_dict, "0"
# encoding = "utf-8" # encoding = "utf-8"
import pymysql import pymysql
import pandas as pd import pandas as pd
...@@ -12,6 +14,7 @@ def con_sql(sql): ...@@ -12,6 +14,7 @@ def con_sql(sql):
cursor = db.cursor() cursor = db.cursor()
cursor.execute(sql) cursor.execute(sql)
result = cursor.fetchall() result = cursor.fetchall()
print("成功从数据库获取数据")
df = pd.DataFrame(list(result)).dropna() df = pd.DataFrame(list(result)).dropna()
db.close() db.close()
return df return df
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment