Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
2cf954ad
Commit
2cf954ad
authored
Mar 17, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
try all
parent
8e14a43a
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
152 additions
and
16 deletions
+152
-16
new_tag3_cold_start.py
eda/smart_rank/new_tag3_cold_start.py
+152
-16
No files found.
eda/smart_rank/new_tag3_cold_start.py
View file @
2cf954ad
...
@@ -10,6 +10,24 @@ def merge_lists(*lol):
...
@@ -10,6 +10,24 @@ def merge_lists(*lol):
return
[
i
for
i
in
chain
(
*
a
)
if
i
is
not
None
]
return
[
i
for
i
in
chain
(
*
a
)
if
i
is
not
None
]
def
get_result
(
hits
):
res_dict
=
{}
for
info
in
hits
:
second_demands
=
info
[
"_source"
]
.
get
(
"second_demands"
,
[])
if
second_demands
and
len
(
second_demands
)
>
0
:
id
=
info
[
"_source"
][
"id"
]
sd
=
second_demands
[
0
]
if
sd
not
in
res_dict
:
res_dict
[
sd
]
=
[
id
]
else
:
res_dict
[
sd
]
.
append
(
id
)
res_pair
=
sorted
(
res_dict
.
items
(),
key
=
lambda
x
:
len
(
x
[
1
]),
reverse
=
True
)
res_list
=
[]
for
_
,
v
in
res_pair
:
res_list
.
append
(
v
)
return
merge_lists
(
res_list
)
keywords
=
[
keywords
=
[
"瘦脸"
,
"双眼皮"
,
"补水"
,
"美白嫩肤"
,
"瘦小腿"
,
"除皱"
,
"隆鼻"
,
"牙齿矫正"
,
"脱毛"
,
"祛斑"
,
"控油"
,
"面部祛脂"
,
"隆胸"
,
"垫鼻基底"
,
"缩鼻翼"
,
"生头发"
,
"开眼角"
,
"瘦肩"
,
"祛痘"
,
"瘦脸"
,
"双眼皮"
,
"补水"
,
"美白嫩肤"
,
"瘦小腿"
,
"除皱"
,
"隆鼻"
,
"牙齿矫正"
,
"脱毛"
,
"祛斑"
,
"控油"
,
"面部祛脂"
,
"隆胸"
,
"垫鼻基底"
,
"缩鼻翼"
,
"生头发"
,
"开眼角"
,
"瘦肩"
,
"祛痘"
,
"缩毛孔"
,
"轮廓改善"
,
"除眼底细纹"
,
"眼部修复"
,
"垫下巴"
,
"颧骨内推"
,
"除法令纹"
,
"缩咬肌"
,
"瘦臀部"
,
"丰眼窝"
,
"瘦手臂"
,
"丰唇"
,
"瘦腰腹"
,
"瘦大腿"
,
"填充泪沟"
,
"洁面"
,
"除面部细纹"
,
"缩毛孔"
,
"轮廓改善"
,
"除眼底细纹"
,
"眼部修复"
,
"垫下巴"
,
"颧骨内推"
,
"除法令纹"
,
"缩咬肌"
,
"瘦臀部"
,
"丰眼窝"
,
"瘦手臂"
,
"丰唇"
,
"瘦腰腹"
,
"瘦大腿"
,
"填充泪沟"
,
"洁面"
,
"除面部细纹"
,
...
@@ -132,23 +150,141 @@ def get_es_diary(keywords, city_tag_id=-1, version=False):
...
@@ -132,23 +150,141 @@ def get_es_diary(keywords, city_tag_id=-1, version=False):
q
[
"sort"
]
=
sort_list
q
[
"sort"
]
=
sort_list
q
[
"_source"
]
=
{
"includes"
:
[
"id"
,
"second_demands"
]}
q
[
"_source"
]
=
{
"includes"
:
[
"id"
,
"second_demands"
]}
es_res
=
es_query
(
"diary"
,
q
,
offset
=
0
,
size
=
5000
)
es_res
=
es_query
(
"diary"
,
q
,
offset
=
0
,
size
=
5000
)
diary_dict
=
{}
return
get_result
(
es_res
[
"hits"
][
"hits"
])
for
diary_info
in
es_res
[
"hits"
][
"hits"
]:
second_demands
=
diary_info
[
"_source"
]
.
get
(
"second_demands"
,
[])
if
second_demands
and
len
(
second_demands
)
>
0
:
def
get_es_tractate
(
keywords
,
version
=
False
):
id
=
diary_info
[
"_source"
][
"id"
]
q
=
{}
sd
=
second_demands
[
0
]
should_list
=
[{
if
sd
not
in
diary_dict
:
"terms"
:
{
diary_dict
[
sd
]
=
[
id
]
"first_demands"
:
keywords
}
},
{
"terms"
:
{
"second_demands"
:
keywords
}
},
{
"terms"
:
{
"first_solutions"
:
keywords
}
},
{
"terms"
:
{
"second_solutions"
:
keywords
}
},
{
"terms"
:
{
"positions"
:
keywords
}
},
{
"terms"
:
{
"second_positions"
:
keywords
}
},
{
"terms"
:
{
"tags_v3"
:
keywords
}
}]
q
[
"query"
]
=
{
"bool"
:
{
"filter"
:
[{
"term"
:
{
"is_online"
:
True
}
},
{
"terms"
:
{
"content_level"
:
[
6
,
5
,
4
,
3.5
,
3
]
}
}],
"should"
:
should_list
,
"minimum_should_match"
:
1
}
}
q
[
"_source"
]
=
{
"includes"
:
[
"id"
,
"second_demands"
]}
if
version
:
q
[
"sort"
]
=
[{
"is_video"
:
{
"order"
:
"asc"
}
},
{
"good_click_tractate_score"
:
{
"order"
:
"desc"
}
},
{
"good_click"
:
{
"order"
:
"desc"
}
}]
else
:
else
:
diary_dict
[
sd
]
.
append
(
id
)
q
[
"sort"
]
=
[{
"is_video"
:
{
"order"
:
"asc"
}},
{
"tractate_score"
:
{
"order"
:
"desc"
}},
{
"good_click"
:
{
"order"
:
"desc"
}}]
res_pair
=
sorted
(
diary_dict
.
items
(),
key
=
lambda
x
:
len
(
x
[
1
]),
reverse
=
True
)
es_res
=
es_query
(
"tractate"
,
q
,
offset
=
0
,
size
=
5000
)
res_list
=
[]
return
get_result
(
es_res
[
"hits"
][
"hits"
])
for
_
,
v
in
res_pair
:
res_list
.
append
(
v
)
return
merge_lists
(
res_list
)
def
get_es_answer
(
keywords
,
version
=
False
):
q
=
{}
if
version
:
sort_list
=
[{
"has_picture"
:
{
"order"
:
"desc"
}},
{
"smart_rank_v3"
:
{
"order"
:
"desc"
}},
{
"good_click"
:
{
"order"
:
"desc"
}}]
else
:
sort_list
=
[{
"has_picture"
:
{
"order"
:
"desc"
}},
{
"smart_rank_v2"
:
{
"order"
:
"desc"
}},
{
"good_click"
:
{
"order"
:
"desc"
}}]
should_list
=
[{
"terms"
:
{
"first_demands"
:
keywords
}
},
{
"terms"
:
{
"second_demands"
:
keywords
}
},
{
"terms"
:
{
"first_solutions"
:
keywords
}
},
{
"terms"
:
{
"second_solutions"
:
keywords
}
},
{
"terms"
:
{
"positions"
:
keywords
}
},
{
"terms"
:
{
"second_positions"
:
keywords
}
},
{
"terms"
:
{
"tags_v3"
:
keywords
}
}]
q
[
"query"
]
=
{
"bool"
:
{
"filter"
:
[{
"range"
:
{
"content_length"
:
{
"gte"
:
30
}
}
},
{
"term"
:
{
"is_online"
:
True
}
},
{
"terms"
:
{
"content_level"
:
[
"6"
,
"5"
,
"4"
,
"3.5"
,
"3"
]
}
}],
"should"
:
should_list
,
"minimum_should_match"
:
1
}
}
q
[
"_source"
]
=
{
"includes"
:
[
"id"
,
"second_demands"
]}
q
[
"sort"
]
=
sort_list
es_res
=
es_query
(
"answer"
,
q
,
offset
=
0
,
size
=
5000
)
return
get_result
(
es_res
[
"hits"
][
"hits"
])
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
ids
=
get_es_diary
(
keywords
)
print
(
get_es_diary
(
keywords
)[:
10
])
print
(
ids
)
print
(
get_es_tractate
(
keywords
)[:
10
])
print
(
get_es_answer
(
keywords
)[:
10
])
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment