Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
109574b7
Commit
109574b7
authored
Aug 29, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add testcase
parent
232cabf1
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
35 additions
and
30 deletions
+35
-30
config.py
config.py
+6
-2
diaryUpdateOnlineOffline.py
diaryUpdateOnlineOffline.py
+25
-14
testCases.py
local/testCases.py
+4
-14
No files found.
config.py
View file @
109574b7
...
...
@@ -20,9 +20,13 @@ LOCAL_EAGLE_HOST = "192.168.15.12"
# 本地地址
LOCAL_DIRCTORY
=
"/Users/mac/utils/"
# 线上日记队列域名
QUEUE_ONLINE_HOST
=
'rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com'
# 本地日记队列域名
LOCAL_HOST
=
'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com'
# # 线下pkl
# "/Users/mac/utils/ffm.pkl"
# #线下预测文件
# "/Users/mac/utils/result/{0}.csv".format(queue_name)
# # 线下模型、预测产出文件
...
...
diaryUpdateOnlineOffline.py
View file @
109574b7
...
...
@@ -52,17 +52,17 @@ def feature_en(x_list, device_id):
data
[
"minute"
]
=
data
[
"minute"
]
.
astype
(
"category"
)
# 虽然预测y,但ffm转化需要y,并不影响预测结果
data
[
"y"
]
=
0
#
print("done 特征工程")
print
(
"done 特征工程"
)
return
data
# 把ffm.pkl load进来,将上面的数据转化为ffm格式
def
transform_ffm_format
(
df
,
queue_name
,
device_id
):
with
open
(
DIRECTORY_PATH
+
"ffm.pkl"
,
"rb"
)
as
f
:
with
open
(
path
+
"ffm.pkl"
,
"rb"
)
as
f
:
ffm_format_pandas
=
pickle
.
load
(
f
)
data
=
ffm_format_pandas
.
native_transform
(
df
)
predict_file_name
=
DIRECTORY_PATH
+
"result/{0}_{1}.csv"
.
format
(
device_id
,
queue_name
)
predict_file_name
=
path
+
"result/{0}_{1}.csv"
.
format
(
device_id
,
queue_name
)
data
.
to_csv
(
predict_file_name
,
index
=
False
,
header
=
None
)
# print("done ffm")
return
predict_file_name
...
...
@@ -74,12 +74,12 @@ def predict(queue_name,queue_arg,device_id):
ffm_model
=
xl
.
create_ffm
()
ffm_model
.
setTest
(
data_file_path
)
ffm_model
.
setSigmoid
()
ffm_model
.
predict
(
DIRECTORY_PATH
+
"model.out"
,
DIRECTORY_PATH
+
"result/output{0}_{1}.csv"
.
format
(
device_id
,
queue_name
))
ffm_model
.
predict
(
path
+
"model.out"
,
path
+
"result/output{0}_{1}.csv"
.
format
(
device_id
,
queue_name
))
def
save_result
(
queue_name
,
queue_arg
,
device_id
):
score_df
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"result/output{0}_{1}.csv"
.
format
(
device_id
,
queue_name
),
header
=
None
)
score_df
=
pd
.
read_csv
(
path
+
"result/output{0}_{1}.csv"
.
format
(
device_id
,
queue_name
),
header
=
None
)
mm_scaler
=
MinMaxScaler
()
mm_scaler
.
fit
(
score_df
)
score_df
=
pd
.
DataFrame
(
mm_scaler
.
transform
(
score_df
))
...
...
@@ -105,7 +105,11 @@ def save_result(queue_name,queue_arg,device_id):
def
get_score
(
queue_arg
):
if
flag
:
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
else
:
db
=
pymysql
.
connect
(
host
=
LOCAL_HOST
,
port
=
3306
,
user
=
'work'
,
passwd
=
'workwork'
,
db
=
'zhengxing_tes'
)
cursor
=
db
.
cursor
()
# 去除diary_id 前面的"diary|"
diary_list
=
tuple
(
list
(
map
(
lambda
x
:
x
[
6
:],
queue_arg
[
2
])))
...
...
@@ -163,9 +167,12 @@ def update_dairy_queue(score_df,predict_score_df,total_video_id):
def
update_sql_dairy_queue
(
queue_name
,
diary_id
,
device_id
,
city_id
):
if
flag
:
db
=
pymysql
.
connect
(
host
=
QUEUE_ONLINE_HOST
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_prod'
)
else
:
db
=
pymysql
.
connect
(
host
=
LOCAL_HOST
,
port
=
3306
,
user
=
'work'
,
passwd
=
'workwork'
,
db
=
'doris_test'
)
db
=
pymysql
.
connect
(
host
=
'rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_prod'
)
cursor
=
db
.
cursor
()
id_str
=
str
(
diary_id
[
0
])
for
i
in
range
(
1
,
len
(
diary_id
)):
...
...
@@ -205,8 +212,13 @@ def queue_compare(old_list, new_list):
def
get_queue
(
device_id
,
city_id
,
queue_name
):
db
=
pymysql
.
connect
(
host
=
'rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_prod'
)
if
flag
:
db
=
pymysql
.
connect
(
host
=
QUEUE_ONLINE_HOST
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_prod'
)
else
:
db
=
pymysql
.
connect
(
host
=
LOCAL_HOST
,
port
=
3306
,
user
=
'work'
,
passwd
=
'workwork'
,
db
=
'doris_test'
)
cursor
=
db
.
cursor
()
sql
=
"select {} from device_diary_queue "
\
"where device_id = '{}' and city_id = '{}';"
.
format
(
queue_name
,
device_id
,
city_id
)
...
...
@@ -271,19 +283,18 @@ def multi_proecess_update(device_id, city_id, data_set_cid,total_video_id):
if
__name__
==
"__main__"
:
warnings
.
filterwarnings
(
"ignore"
)
flag
=
False
path
=
LOCAL_DIRCTORY
# 下面这个ip是线上服务器ip
if
socket
.
gethostbyname
(
socket
.
gethostname
())
==
'10.31.242.83'
:
flag
=
True
path
=
DIRECTORY_PATH
total_number
=
0
# 增加缓存日记视频列表
cache_video_id
=
[]
cache_device_city_list
=
[]
update_queue_numbers
=
0
while
True
:
if
flag
:
data_set_cid
=
pd
.
read_csv
(
DIRECTORY_PATH
+
"data_set_cid.csv"
)[
"cid"
]
.
values
.
tolist
()
else
:
data_set_cid
=
pd
.
read_csv
(
LOCAL_DIRCTORY
+
"data_set_cid.csv"
)[
"cid"
]
.
values
.
tolist
()
data_set_cid
=
pd
.
read_csv
(
path
+
"data_set_cid.csv"
)[
"cid"
]
.
values
.
tolist
()
total_video_id
=
get_video_id
(
cache_video_id
)
cache_video_id
=
total_video_id
device_city_list
=
get_active_users
(
flag
)
...
...
local/testCases.py
View file @
109574b7
...
...
@@ -9,11 +9,10 @@ def get_video_id():
sql
=
"select diary_id from feed_diary_boost;"
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
df
=
pd
.
DataFrame
(
list
(
result
))
video_id
=
df
[
0
]
.
values
.
tolist
(
)
print
(
video_id
[:
10
]
)
df
=
pd
.
DataFrame
(
list
(
result
))
df
.
to_csv
(
'/data2/models/video_diary_id.csv'
,
index
=
None
)
print
(
"成功"
)
db
.
close
()
return
video_id
def
queue
():
db
=
pymysql
.
connect
(
host
=
'rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'doris'
,
...
...
@@ -29,12 +28,4 @@ def queue():
return
all
if
__name__
==
"__main__"
:
all
=
queue
()
video
=
get_video_id
()
video_id
=
[]
x
=
1
while
x
<
len
(
all
):
video_id
.
append
(
all
[
x
])
x
+=
3
a
=
list
(
set
(
video
)
&
set
(
video_id
))
print
(
a
==
video_id
)
\ No newline at end of file
get_video_id
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment