Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
3a6de1e5
Commit
3a6de1e5
authored
Aug 10, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
add a comment and new a class ClkCidUidRate
parents
62b8bc34
11c1c9b8
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
17 additions
and
14 deletions
+17
-14
diaryCandidateSet.py
diaryCandidateSet.py
+0
-4
predictDiary.py
predictDiary.py
+5
-3
processData.py
processData.py
+7
-3
train.py
train.py
+5
-4
No files found.
diaryCandidateSet.py
View file @
3a6de1e5
...
@@ -7,12 +7,8 @@ from config import *
...
@@ -7,12 +7,8 @@ from config import *
# 候选集cid只能从训练数据集cid中选择
# 候选集cid只能从训练数据集cid中选择
def
filter_cid
(
df
):
def
filter_cid
(
df
):
data_set_cid
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"data_set_cid.csv"
)[
"cid"
]
.
values
.
tolist
()
data_set_cid
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"data_set_cid.csv"
)[
"cid"
]
.
values
.
tolist
()
print
(
"过滤前样本大小:"
)
print
(
df
.
shape
)
if
not
df
.
empty
:
if
not
df
.
empty
:
df
=
df
.
loc
[
df
[
"cid"
]
.
isin
(
data_set_cid
)]
df
=
df
.
loc
[
df
[
"cid"
]
.
isin
(
data_set_cid
)]
print
(
"过滤后样本大小:"
)
print
(
df
.
shape
)
return
df
return
df
...
...
predictDiary.py
View file @
3a6de1e5
...
@@ -94,17 +94,19 @@ def router(device_id):
...
@@ -94,17 +94,19 @@ def router(device_id):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
# TODO 如果耗时小于一分钟,下一次取到的device_id和上一次相同
# TODO 如果耗时小于一分钟,下一次取到的device_id和上一次相同
while
True
:
while
True
:
start
=
time
.
time
()
start
=
time
.
time
()
empty
,
device_id_list
=
get_active_users
()
empty
,
device_id_list
=
get_active_users
()
if
empty
:
if
empty
:
time
.
sleep
(
10
)
time
.
sleep
(
10
)
else
:
else
:
old_device_id_list
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"data_set_device_id.csv"
)[
"device_id"
]
.
values
.
tolist
()
for
device_id
in
device_id_list
:
for
device_id
in
device_id_list
:
if
device_id
in
old_device_id_list
:
router
(
device_id
)
router
(
device_id
)
else
:
print
(
"该用户不是老用户,不能预测"
)
end
=
time
.
time
()
end
=
time
.
time
()
time_cost
=
(
end
-
start
)
time_cost
=
(
end
-
start
)
print
(
"
预测
耗时{}秒"
.
format
(
time_cost
))
print
(
"耗时{}秒"
.
format
(
time_cost
))
processData.py
View file @
3a6de1e5
...
@@ -60,6 +60,13 @@ def feature_en():
...
@@ -60,6 +60,13 @@ def feature_en():
print
(
cid_df
.
head
(
2
))
print
(
cid_df
.
head
(
2
))
cid_df
.
to_csv
(
DIRECTORY_PATH
+
"data_set_cid.csv"
,
index
=
False
)
cid_df
.
to_csv
(
DIRECTORY_PATH
+
"data_set_cid.csv"
,
index
=
False
)
# 将device_id 保存。目的是为了判断预测的device_id是否在这个集合里,如果不在,不需要预测
data_set_device_id
=
data
[
"device_id"
]
.
unique
()
device_id_df
=
pd
.
DataFrame
()
device_id_df
[
'device_id'
]
=
data_set_device_id
print
(
"data_set_device_id :"
)
print
(
device_id_df
.
head
(
2
))
device_id_df
.
to_csv
(
DIRECTORY_PATH
+
"data_set_device_id.csv"
,
index
=
False
)
return
data
,
test_number
,
validation_number
return
data
,
test_number
,
validation_number
...
@@ -99,8 +106,5 @@ def ffm_transform(data, test_number, validation_number):
...
@@ -99,8 +106,5 @@ def ffm_transform(data, test_number, validation_number):
train
.
to_csv
(
DIRECTORY_PATH
+
"train{0}-{1}.csv"
.
format
(
DATA_START_DATE
,
VALIDATION_DATE
),
index
=
False
,
header
=
None
)
train
.
to_csv
(
DIRECTORY_PATH
+
"train{0}-{1}.csv"
.
format
(
DATA_START_DATE
,
VALIDATION_DATE
),
index
=
False
,
header
=
None
)
if
__name__
==
"__main__"
:
data_fe
=
feature_en
()
ffm_transform
(
data_fe
)
train.py
View file @
3a6de1e5
...
@@ -5,9 +5,10 @@ from diaryCandidateSet import get_eachCityDiaryTop3000
...
@@ -5,9 +5,10 @@ from diaryCandidateSet import get_eachCityDiaryTop3000
# 把数据获取、特征转换、模型训练的模型串联在一起
# 把数据获取、特征转换、模型训练的模型串联在一起
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
data
_fe
=
feature_en
()
data
,
test_number
,
validation_number
=
feature_en
()
ffm_transform
(
data
_fe
)
ffm_transform
(
data
,
test_number
,
validation_number
)
train
()
train
()
print
(
'---------------prepare candidates--------------'
)
print
(
"end"
)
get_eachCityDiaryTop3000
()
# print('---------------prepare candidates--------------')
# get_eachCityDiaryTop3000()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment