Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
e5a2d469
Commit
e5a2d469
authored
Aug 28, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
删除获取热门日记集合
parent
0eacdc06
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
4 additions
and
9 deletions
+4
-9
diaryCandidateSet.py
diaryCandidateSet.py
+4
-4
train.py
train.py
+0
-5
No files found.
diaryCandidateSet.py
View file @
e5a2d469
...
...
@@ -17,7 +17,7 @@ def filter_cid(df):
def
get_allCitiesDiaryTop3000
():
# 获取全国点击量TOP3000日记
sql
=
"select city_id,cid from data_feed_click "
\
sql
=
"select city_id,cid from data_feed_click
2
"
\
"where cid_type = 'diary' group by cid order by max(click_count_choice) desc limit 3000"
allCitiesTop3000
=
con_sql
(
sql
)
allCitiesTop3000
=
allCitiesTop3000
.
rename
(
columns
=
{
0
:
"city_id"
,
1
:
"cid"
})
...
...
@@ -28,7 +28,7 @@ def get_allCitiesDiaryTop3000():
def
get_cityList
():
# 获取全国城市列表
sql
=
"select distinct city_id from data_feed_click"
sql
=
"select distinct city_id from data_feed_click
2
"
cityList
=
con_sql
(
sql
)
cityList
.
to_csv
(
DIRECTORY_PATH
+
"diaryTestSet/cityList.csv"
,
index
=
False
)
cityList
=
cityList
[
0
]
.
values
.
tolist
()
...
...
@@ -40,7 +40,7 @@ def get_eachCityDiaryTop3000():
cityList
=
get_cityList
()
allCitiesTop3000
=
get_allCitiesDiaryTop3000
()
for
i
in
cityList
:
sql
=
"select city_id,cid from data_feed_click "
\
sql
=
"select city_id,cid from data_feed_click
2
"
\
"where cid_type = 'diary' and city_id = '{0}' group by cid "
\
"order by max(click_count_choice) desc limit 3000"
.
format
(
i
)
data
=
con_sql
(
sql
)
...
...
@@ -78,7 +78,7 @@ def multi_get_eachCityDiaryTop3000(processes=8):
allCitiesTop3000
=
get_allCitiesDiaryTop3000
()
pool
=
Pool
(
processes
)
for
city
in
city_list
:
sql
=
"select city_id,cid from data_feed_click "
\
sql
=
"select city_id,cid from data_feed_click
2
"
\
"where cid_type = 'diary' and city_id = '{0}' group by cid "
\
"order by max(click_count_choice) desc limit 3000"
.
format
(
city
)
...
...
train.py
View file @
e5a2d469
...
...
@@ -18,11 +18,6 @@ if __name__ == "__main__":
train
()
end_train
=
time
.
time
()
print
(
"训练模型耗时{}分"
.
format
((
end_train
-
start_train
)
/
60
))
print
(
'---------------prepare candidates--------------'
)
start
=
time
.
time
()
multi_get_eachCityDiaryTop3000
()
end
=
time
.
time
()
print
(
"获取各城市热门日记耗时{}分"
.
format
((
end
-
start
)
/
60
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment