Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
b02b5b23
Commit
b02b5b23
authored
Aug 14, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
add a clickZeroUidRateDetail.py for the distribution of rate
parents
972f6248
0c65a30c
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
33 additions
and
6 deletions
+33
-6
aucCaculate.py
aucCaculate.py
+2
-0
config.py
config.py
+1
-0
delete_temp_csv.py
delete_temp_csv.py
+2
-0
diaryCandidateSet.py
diaryCandidateSet.py
+2
-0
diaryTraining.py
diaryTraining.py
+2
-0
predictDiary.py
predictDiary.py
+5
-2
prepareData.py
prepareData.py
+2
-0
processData.py
processData.py
+2
-0
train.py
train.py
+2
-0
userProfile.py
userProfile.py
+10
-4
utils.py
utils.py
+3
-0
No files found.
aucCaculate.py
View file @
b02b5b23
from
eda.ml_tools.rocCurve
import
get_roc_curve
from
eda.ml_tools.rocCurve
import
get_roc_curve
import
pandas
as
pd
import
pandas
as
pd
from
config
import
*
from
config
import
*
...
...
config.py
View file @
b02b5b23
DIRECTORY_PATH
=
'/data2/models/'
DIRECTORY_PATH
=
'/data2/models/'
VALIDATION_DATE
=
'2018-08-05'
VALIDATION_DATE
=
'2018-08-05'
TEST_DATE
=
'2018-08-06'
TEST_DATE
=
'2018-08-06'
...
...
delete_temp_csv.py
View file @
b02b5b23
import
os
import
os
import
time
import
time
from
config
import
*
from
config
import
*
...
...
diaryCandidateSet.py
View file @
b02b5b23
import
pymysql
import
pymysql
import
pandas
as
pd
import
pandas
as
pd
from
utils
import
*
from
utils
import
*
...
...
diaryTraining.py
View file @
b02b5b23
import
xlearn
as
xl
import
xlearn
as
xl
from
config
import
*
from
config
import
*
...
...
predictDiary.py
View file @
b02b5b23
from
config
import
*
from
config
import
*
import
pandas
as
pd
import
pandas
as
pd
import
pickle
import
pickle
...
@@ -40,6 +42,7 @@ def transform_ffm_format(df, device_id):
...
@@ -40,6 +42,7 @@ def transform_ffm_format(df, device_id):
print
(
"ffm格式转化结束"
)
print
(
"ffm格式转化结束"
)
predict_file_name
=
DIRECTORY_PATH
+
"result/{0}_{1}DiaryTop3000.csv"
.
format
(
device_id
,
now
)
predict_file_name
=
DIRECTORY_PATH
+
"result/{0}_{1}DiaryTop3000.csv"
.
format
(
device_id
,
now
)
data
.
to_csv
(
predict_file_name
,
index
=
False
,
header
=
None
)
data
.
to_csv
(
predict_file_name
,
index
=
False
,
header
=
None
)
print
(
"ffm写到服务器"
)
return
predict_file_name
return
predict_file_name
...
@@ -86,7 +89,7 @@ def predict_save_to_redis(user_profile, instance):
...
@@ -86,7 +89,7 @@ def predict_save_to_redis(user_profile, instance):
def
router
(
device_id
):
def
router
(
device_id
):
user_profile
,
not_exist
=
fetch_user_profile
(
device_id
)
user_profile
,
not_exist
=
fetch_user_profile
(
device_id
)
if
not_exist
:
if
not_exist
==
1
:
print
(
'Sorry, we don
\'
t have you.'
)
print
(
'Sorry, we don
\'
t have you.'
)
else
:
else
:
predict
(
user_profile
)
predict
(
user_profile
)
...
@@ -97,7 +100,7 @@ if __name__ == "__main__":
...
@@ -97,7 +100,7 @@ if __name__ == "__main__":
while
True
:
while
True
:
start
=
time
.
time
()
start
=
time
.
time
()
empty
,
device_id_list
=
get_active_users
()
empty
,
device_id_list
=
get_active_users
()
if
empty
:
if
empty
==
1
:
time
.
sleep
(
60
)
time
.
sleep
(
60
)
else
:
else
:
old_device_id_list
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"data_set_device_id.csv"
)[
"device_id"
]
.
values
.
tolist
()
old_device_id_list
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"data_set_device_id.csv"
)[
"device_id"
]
.
values
.
tolist
()
...
...
prepareData.py
View file @
b02b5b23
from
utils
import
con_sql
from
utils
import
con_sql
import
datetime
import
datetime
...
...
processData.py
View file @
b02b5b23
import
time
import
time
from
prepareData
import
fetch_data
from
prepareData
import
fetch_data
from
utils
import
FFMFormatPandas
from
utils
import
FFMFormatPandas
...
...
train.py
View file @
b02b5b23
from
processData
import
*
from
processData
import
*
from
diaryTraining
import
*
from
diaryTraining
import
*
from
diaryCandidateSet
import
get_eachCityDiaryTop3000
from
diaryCandidateSet
import
get_eachCityDiaryTop3000
...
...
userProfile.py
View file @
b02b5b23
from
utils
import
con_sql
from
utils
import
con_sql
from
datetime
import
datetime
from
datetime
import
datetime
...
@@ -12,13 +14,15 @@ def get_active_users():
...
@@ -12,13 +14,15 @@ def get_active_users():
device_id_df
=
con_sql
(
sql
)
device_id_df
=
con_sql
(
sql
)
if
device_id_df
.
empty
:
if
device_id_df
.
empty
:
print
(
"当下这一分钟没有活跃用户,不需要预测"
)
print
(
"当下这一分钟没有活跃用户,不需要预测"
)
return
True
,
None
# 为了debug supervisor,修改了下面的return参数
return
1
,[
1
,
2
]
else
:
else
:
device_id_list
=
device_id_df
[
0
]
.
values
.
tolist
()
device_id_list
=
device_id_df
[
0
]
.
values
.
tolist
()
# 对device_id 进行去重
# 对device_id 进行去重
device_id_list
=
list
(
set
(
device_id_list
))
device_id_list
=
list
(
set
(
device_id_list
))
print
(
"成功获取当下一分钟内活跃用户"
)
print
(
"成功获取当下一分钟内活跃用户"
)
return
False
,
device_id_list
# 为了debug supervisor,修改了下面的return参数
return
"0"
,
device_id_list
def
fetch_user_profile
(
device_id
):
def
fetch_user_profile
(
device_id
):
...
@@ -26,11 +30,13 @@ def fetch_user_profile(device_id):
...
@@ -26,11 +30,13 @@ def fetch_user_profile(device_id):
user_profile
=
con_sql
(
sql
)
user_profile
=
con_sql
(
sql
)
if
user_profile
.
empty
:
if
user_profile
.
empty
:
print
(
"没有获取到该用户对应的city_id"
)
print
(
"没有获取到该用户对应的city_id"
)
return
{},
user_profile
.
empty
# 为了debug supervisor,修改了下面的return参数
return
{
1
:
2
},
1
else
:
else
:
user_profile
=
user_profile
.
rename
(
columns
=
{
0
:
"device_id"
,
1
:
"city_id"
})
user_profile
=
user_profile
.
rename
(
columns
=
{
0
:
"device_id"
,
1
:
"city_id"
})
print
(
"成功获取该用户对应的city_id"
)
print
(
"成功获取该用户对应的city_id"
)
user_profile_dict
=
{}
user_profile_dict
=
{}
for
i
in
user_profile
.
columns
:
for
i
in
user_profile
.
columns
:
user_profile_dict
[
i
]
=
user_profile
.
loc
[
0
,
i
]
user_profile_dict
[
i
]
=
user_profile
.
loc
[
0
,
i
]
return
user_profile_dict
,
user_profile
.
empty
# 为了debug supervisor,修改了下面的return参数
return
user_profile_dict
,
"0"
utils.py
View file @
b02b5b23
# encoding = "utf-8"
# encoding = "utf-8"
import
pymysql
import
pymysql
import
pandas
as
pd
import
pandas
as
pd
...
@@ -12,6 +14,7 @@ def con_sql(sql):
...
@@ -12,6 +14,7 @@ def con_sql(sql):
cursor
=
db
.
cursor
()
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
result
=
cursor
.
fetchall
()
print
(
"成功从数据库获取数据"
)
df
=
pd
.
DataFrame
(
list
(
result
))
.
dropna
()
df
=
pd
.
DataFrame
(
list
(
result
))
.
dropna
()
db
.
close
()
db
.
close
()
return
df
return
df
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment