Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
78d37e75
Commit
78d37e75
authored
Aug 29, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add print
parent
9202dafb
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
9 deletions
+11
-9
diary-training.py
diary-training.py
+8
-6
utils.py
utils.py
+3
-3
No files found.
diary-training.py
View file @
78d37e75
...
@@ -4,6 +4,8 @@ import pandas as pd
...
@@ -4,6 +4,8 @@ import pandas as pd
from
sklearn.utils
import
shuffle
from
sklearn.utils
import
shuffle
import
numpy
as
np
import
numpy
as
np
import
xlearn
as
xl
import
xlearn
as
xl
# 从数据库的表里获取数据,并转化成df格式
# 从数据库的表里获取数据,并转化成df格式
def
con_sql
(
sql
):
def
con_sql
(
sql
):
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
...
@@ -48,13 +50,13 @@ data = click.append(exposure)
...
@@ -48,13 +50,13 @@ data = click.append(exposure)
print
(
"done 合并点击表和曝光表"
)
print
(
"done 合并点击表和曝光表"
)
print
(
data
.
head
(
2
))
print
(
data
.
head
(
2
))
# 从time特征中抽取hour、weekday
# 从time特征中抽取hour、weekday
data
[
"hour"
]
=
data
[
"time"
]
.
apply
(
lambda
x
:
datetime
.
datetime
.
fromtimestamp
(
x
)
.
hour
)
#
data["hour"] = data["time"].apply(lambda x:datetime.datetime.fromtimestamp(x).hour)
data
[
"weekday"
]
=
data
[
"time"
]
.
apply
(
lambda
x
:
datetime
.
datetime
.
fromtimestamp
(
x
)
.
weekday
())
#
data["weekday"] = data["time"].apply(lambda x:datetime.datetime.fromtimestamp(x).weekday())
# 数值是0的特征会被ffm格式删除,经过下面的处理后,没有数值是0的特征
# 数值是0的特征会被ffm格式删除,经过下面的处理后,没有数值是0的特征
data
.
loc
[
data
[
"hour"
]
==
0
]
=
24
#
data.loc[data["hour"]==0] = 24
data
.
loc
[
data
[
"weekday"
]
==
0
]
=
7
#
data.loc[data["weekday"]==0] = 7
data
[
"hour"
]
=
data
[
"hour"
]
.
astype
(
"category"
)
#
data["hour"] = data["hour"].astype("category")
data
[
"weekday"
]
=
data
[
"weekday"
]
.
astype
(
"category"
)
#
data["weekday"] = data["weekday"].astype("category")
data
=
data
.
drop
(
"time"
,
axis
=
1
)
data
=
data
.
drop
(
"time"
,
axis
=
1
)
print
(
"成功从time特征中抽取hour、weekday"
)
print
(
"成功从time特征中抽取hour、weekday"
)
print
(
data
.
head
(
2
))
print
(
data
.
head
(
2
))
...
...
utils.py
View file @
78d37e75
...
@@ -59,13 +59,13 @@ def queue_compare(old_list, new_list):
...
@@ -59,13 +59,13 @@ def queue_compare(old_list, new_list):
x_dict
=
dict
(
zip
(
old_list
,
temp
))
x_dict
=
dict
(
zip
(
old_list
,
temp
))
temp
=
list
(
range
(
len
(
new_list
)))
temp
=
list
(
range
(
len
(
new_list
)))
y_dict
=
dict
(
zip
(
new_list
,
temp
))
y_dict
=
dict
(
zip
(
new_list
,
temp
))
i
=
0
i
=
0
for
key
in
x_dict
.
keys
():
for
key
in
x_dict
.
keys
():
if
x_dict
[
key
]
!=
y_dict
[
key
]:
if
x_dict
[
key
]
!=
y_dict
[
key
]:
i
+=
1
i
+=
1
print
(
"日记队列更新前日记总个数{},位置发生变化个数{},发生变化率{}
%
"
.
format
(
len
(
old_list
),
i
,
if
i
>
0
:
round
(
i
/
len
(
old_list
)
*
100
),
2
))
print
(
"日记队列更新前日记总个数{},位置发生变化个数{},发生变化率{}
%
"
.
format
(
len
(
old_list
),
i
,
round
(
i
/
len
(
old_list
)
*
100
),
2
))
def
move_file
():
def
move_file
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment