Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
90cddbc5
Commit
90cddbc5
authored
Aug 27, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
delete prints
parent
2cd09bf7
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
10 additions
and
34 deletions
+10
-34
diaryCandidateSet.py
diaryCandidateSet.py
+1
-4
diaryQueueUpdate.py
diaryQueueUpdate.py
+8
-28
processData.py
processData.py
+0
-1
train.py
train.py
+1
-1
No files found.
diaryCandidateSet.py
View file @
90cddbc5
...
...
@@ -23,7 +23,6 @@ def get_allCitiesDiaryTop3000():
allCitiesTop3000
=
allCitiesTop3000
.
rename
(
columns
=
{
0
:
"city_id"
,
1
:
"cid"
})
allCitiesTop3000
=
filter_cid
(
allCitiesTop3000
)
allCitiesTop3000
.
to_csv
(
DIRECTORY_PATH
+
"diaryTestSet/allCitiesDiaryTop3000.csv"
,
index
=
False
)
print
(
"成功获取全国日记点击量TOP3000"
)
return
allCitiesTop3000
...
...
@@ -33,7 +32,6 @@ def get_cityList():
cityList
=
con_sql
(
sql
)
cityList
.
to_csv
(
DIRECTORY_PATH
+
"diaryTestSet/cityList.csv"
,
index
=
False
)
cityList
=
cityList
[
0
]
.
values
.
tolist
()
print
(
"成功获取全国城市列表"
)
return
cityList
...
...
@@ -58,7 +56,7 @@ def get_eachCityDiaryTop3000():
file_name
=
DIRECTORY_PATH
+
"diaryTestSet/{0}DiaryTop3000.csv"
.
format
(
i
)
data
.
to_csv
(
file_name
,
index
=
False
)
print
(
"成功保存{}地区DiaryTop3000"
.
format
(
i
))
def
pool_method
(
city
,
sql
,
allCitiesTop3000
):
data
=
con_sql
(
sql
)
...
...
@@ -72,7 +70,6 @@ def pool_method(city,sql,allCitiesTop3000):
file_name
=
DIRECTORY_PATH
+
"diaryTestSet/{0}DiaryTop3000.csv"
.
format
(
city
)
data
.
to_csv
(
file_name
,
index
=
False
)
print
(
"成功保存{}地区DiaryTop3000"
.
format
(
city
))
# 多线程方法获取全国城市热门日记
...
...
diaryQueueUpdate.py
View file @
90cddbc5
...
...
@@ -40,7 +40,7 @@ def feature_en(x_list, device_id):
data
[
"minute"
]
=
data
[
"minute"
]
.
astype
(
"category"
)
# 虽然预测y,但ffm转化需要y,并不影响预测结果
data
[
"y"
]
=
0
print
(
"done 特征工程"
)
#
print("done 特征工程")
return
data
...
...
@@ -52,7 +52,7 @@ def transform_ffm_format(df,queue_name,device_id):
data
=
ffm_format_pandas
.
native_transform
(
df
)
predict_file_name
=
DIRECTORY_PATH
+
"result/{0}_{1}.csv"
.
format
(
device_id
,
queue_name
)
data
.
to_csv
(
predict_file_name
,
index
=
False
,
header
=
None
)
print
(
"done ffm"
)
#
print("done ffm")
return
predict_file_name
...
...
@@ -131,7 +131,7 @@ def update_dairy_queue(score_df,predict_score_df,total_video_id):
new_queue
.
insert
(
i
,
j
)
i
+=
5
print
(
"分数合并成功"
)
#
print("分数合并成功")
return
new_queue
# 如果没有视频日记
else
:
...
...
@@ -139,7 +139,7 @@ def update_dairy_queue(score_df,predict_score_df,total_video_id):
predict_score_df
=
predict_score_df
.
set_index
([
"cid"
])
score_df
[
"score"
]
=
score_df
[
"score"
]
+
predict_score_df
[
"score"
]
score_df
=
score_df
.
sort_values
(
by
=
"score"
,
ascending
=
False
)
print
(
"1分数合并成功
"
)
# print("分数合并成功1
")
return
score_df
.
index
.
tolist
()
...
...
@@ -159,26 +159,6 @@ def update_sql_dairy_queue(queue_name, diary_id,device_id, city_id):
print
(
"成功写入diaryid"
)
# 更新前获取最新的native_queue
def
get_megacity_queue
(
device_id
,
city_id
):
db
=
pymysql
.
connect
(
host
=
'rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_prod'
)
cursor
=
db
.
cursor
()
sql
=
"select megacity_queue from device_diary_queue "
\
"where device_id = '{}' and city_id = '{}';"
.
format
(
device_id
,
city_id
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
df
=
pd
.
DataFrame
(
list
(
result
))
if
not
df
.
empty
:
megacity_queue
=
df
.
loc
[
0
,
0
]
.
split
(
","
)
megacity_queue
=
list
(
map
(
lambda
x
:
"diary|"
+
str
(
x
),
megacity_queue
))
db
.
close
()
print
(
"成功获取megacity_queue"
)
return
megacity_queue
else
:
return
False
def
get_queue
(
device_id
,
city_id
,
queue_name
):
db
=
pymysql
.
connect
(
host
=
'rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_prod'
)
...
...
@@ -190,13 +170,13 @@ def get_queue(device_id, city_id,queue_name):
df
=
pd
.
DataFrame
(
list
(
result
))
if
df
.
empty
:
print
(
"该用户对应的日记为空"
)
#
print("该用户对应的日记为空")
return
False
else
:
queue_list
=
df
.
loc
[
0
,
0
]
.
split
(
","
)
queue_list
=
list
(
map
(
lambda
x
:
"diary|"
+
str
(
x
),
queue_list
))
db
.
close
()
print
(
"成功获取queue"
)
#
print("成功获取queue")
return
queue_list
...
...
@@ -205,7 +185,7 @@ def pipe_line(queue_name, queue_arg, device_id,total_video_id):
predict_score_df
=
save_result
(
queue_name
,
queue_arg
,
device_id
)
score_df
=
get_score
(
queue_arg
)
if
score_df
.
empty
:
print
(
"获取的日记列表是空"
)
#
print("获取的日记列表是空")
return
False
else
:
score_df
=
score_df
.
rename
(
columns
=
{
0
:
"score"
,
1
:
"cid"
})
...
...
@@ -224,7 +204,7 @@ def user_update(device_id, city_id, queue_name,data_set_cid,total_video_id):
diary_queue
=
pipe_line
(
queue_name
,
queue_arg
,
device_id
,
total_video_id
)
if
diary_queue
:
update_sql_dairy_queue
(
queue_name
,
diary_queue
,
device_id
,
city_id
)
print
(
"更新结束"
)
#
print("更新结束")
else
:
print
(
"获取的日记列表是空,所以不更新日记队列"
)
else
:
...
...
processData.py
View file @
90cddbc5
...
...
@@ -65,7 +65,6 @@ def feature_en(data_start_date, data_end_date, validation_date, test_date):
def
ffm_transform
(
data
,
test_number
,
validation_number
):
print
(
"Start ffm transform"
)
start
=
time
.
time
()
ffm_train
=
multiFFMFormatPandas
()
...
...
train.py
View file @
90cddbc5
...
...
@@ -23,7 +23,7 @@ if __name__ == "__main__":
multi_get_eachCityDiaryTop3000
()
end
=
time
.
time
()
print
(
"获取各城市热门日记耗时{}分"
.
format
((
end
-
start
)
/
60
))
print
(
"end"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment