Commit b02b5b23 authored by 高雅喆's avatar 高雅喆

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

add a clickZeroUidRateDetail.py for the distribution of rate
parents 972f6248 0c65a30c
from eda.ml_tools.rocCurve import get_roc_curve
import pandas as pd
from config import *
......
DIRECTORY_PATH = '/data2/models/'
VALIDATION_DATE = '2018-08-05'
TEST_DATE = '2018-08-06'
......
import os
import time
from config import *
......
import pymysql
import pandas as pd
from utils import *
......
import xlearn as xl
from config import *
......
from config import *
import pandas as pd
import pickle
......@@ -40,6 +42,7 @@ def transform_ffm_format(df, device_id):
print("ffm格式转化结束")
predict_file_name = DIRECTORY_PATH + "result/{0}_{1}DiaryTop3000.csv".format(device_id, now)
data.to_csv(predict_file_name, index=False,header=None)
print("ffm写到服务器")
return predict_file_name
......@@ -86,7 +89,7 @@ def predict_save_to_redis(user_profile, instance):
def router(device_id):
user_profile, not_exist = fetch_user_profile(device_id)
if not_exist:
if not_exist==1:
print('Sorry, we don\'t have you.')
else:
predict(user_profile)
......@@ -97,7 +100,7 @@ if __name__ == "__main__":
while True:
start = time.time()
empty,device_id_list = get_active_users()
if empty:
if empty==1:
time.sleep(60)
else:
old_device_id_list = pd.read_csv(DIRECTORY_PATH + "data_set_device_id.csv")["device_id"].values.tolist()
......
from utils import con_sql
import datetime
......
import time
from prepareData import fetch_data
from utils import FFMFormatPandas
......
from processData import *
from diaryTraining import *
from diaryCandidateSet import get_eachCityDiaryTop3000
......
from utils import con_sql
from datetime import datetime
......@@ -12,13 +14,15 @@ def get_active_users():
device_id_df = con_sql(sql)
if device_id_df.empty:
print("当下这一分钟没有活跃用户,不需要预测")
return True,None
# 为了debug supervisor,修改了下面的return参数
return 1,[1,2]
else:
device_id_list = device_id_df[0].values.tolist()
# 对device_id 进行去重
device_id_list = list(set(device_id_list))
print("成功获取当下一分钟内活跃用户")
return False,device_id_list
# 为了debug supervisor,修改了下面的return参数
return "0",device_id_list
def fetch_user_profile(device_id):
......@@ -26,11 +30,13 @@ def fetch_user_profile(device_id):
user_profile = con_sql(sql)
if user_profile.empty:
print("没有获取到该用户对应的city_id")
return {}, user_profile.empty
# 为了debug supervisor,修改了下面的return参数
return {1:2}, 1
else:
user_profile = user_profile.rename(columns={0:"device_id",1:"city_id"})
print("成功获取该用户对应的city_id")
user_profile_dict = {}
for i in user_profile.columns:
user_profile_dict[i] = user_profile.loc[0, i]
return user_profile_dict, user_profile.empty
# 为了debug supervisor,修改了下面的return参数
return user_profile_dict, "0"
# encoding = "utf-8"
import pymysql
import pandas as pd
......@@ -12,6 +14,7 @@ def con_sql(sql):
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
print("成功从数据库获取数据")
df = pd.DataFrame(list(result)).dropna()
db.close()
return df
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment