Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
c37ba3aa
Commit
c37ba3aa
authored
Mar 17, 2020
by
赵威
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'offic' into 'master'
Offic See merge request
!44
parents
35cd9025
4aa782a8
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
302 additions
and
0 deletions
+302
-0
new_tag3_cold_start.py
eda/smart_rank/new_tag3_cold_start.py
+302
-0
No files found.
eda/smart_rank/new_tag3_cold_start.py
0 → 100644
View file @
c37ba3aa
import
json
from
itertools
import
chain
,
zip_longest
import
redis
from
es_tool
import
es_query
def
merge_lists
(
*
lol
):
a
=
zip_longest
(
*
lol
)
return
[
i
for
i
in
chain
(
*
a
)
if
i
is
not
None
]
def
get_result
(
hits
):
res_dict
=
{}
for
info
in
hits
:
second_demands
=
info
[
"_source"
]
.
get
(
"second_demands"
,
[])
if
second_demands
and
len
(
second_demands
)
>
0
:
id
=
info
[
"_source"
][
"id"
]
sd
=
second_demands
[
0
]
if
sd
not
in
res_dict
:
res_dict
[
sd
]
=
[
id
]
else
:
res_dict
[
sd
]
.
append
(
id
)
res_pair
=
sorted
(
res_dict
.
items
(),
key
=
lambda
x
:
len
(
x
[
1
]),
reverse
=
True
)
res_list
=
[]
for
_
,
v
in
res_pair
:
res_list
.
append
(
v
)
return
merge_lists
(
*
res_list
)
keywords
=
[
"瘦脸"
,
"双眼皮"
,
"补水"
,
"美白嫩肤"
,
"瘦小腿"
,
"除皱"
,
"隆鼻"
,
"牙齿矫正"
,
"脱毛"
,
"祛斑"
,
"控油"
,
"面部祛脂"
,
"隆胸"
,
"垫鼻基底"
,
"缩鼻翼"
,
"生头发"
,
"开眼角"
,
"瘦肩"
,
"祛痘"
,
"缩毛孔"
,
"轮廓改善"
,
"除眼底细纹"
,
"眼部修复"
,
"垫下巴"
,
"颧骨内推"
,
"除法令纹"
,
"缩咬肌"
,
"瘦臀部"
,
"丰眼窝"
,
"瘦手臂"
,
"丰唇"
,
"瘦腰腹"
,
"瘦大腿"
,
"填充泪沟"
,
"洁面"
,
"除面部细纹"
,
"上眼睑提升"
,
"大眼睛"
,
"缩短眼距离"
,
"除印第安纹"
,
"瘦全身"
,
"下巴改善"
,
"祛双下巴"
,
"祛痘印"
,
"眼部护理"
,
"缩窄下巴"
,
"缩短下巴"
,
"填充苹果肌"
,
"祛痘坑"
,
"填充卧蚕"
]
def
get_es_diary
(
keywords
,
city_tag_id
=-
1
,
version
=
False
):
q
=
{}
sort_list
=
[{
"_script"
:
{
"lang"
:
"groovy"
,
"script_file"
:
"sort_diary-recommend"
,
"type"
:
"number"
,
"params"
:
{
"user_city_tag_id"
:
city_tag_id
,
},
"order"
:
"desc"
,
"_cache"
:
True
,
}
}]
if
version
:
sort_list
+=
[{
"has_video_cover"
:
{
"order"
:
"asc"
}
},
{
"offline_score_v1"
:
{
"order"
:
"desc"
}
},
{
"good_click"
:
{
"order"
:
"desc"
}
},
{
"last_update_time"
:
{
"order"
:
"desc"
}
}]
else
:
sort_list
+=
[{
"has_video_cover"
:
{
"order"
:
"asc"
}
},
{
"offline_score"
:
{
"order"
:
"desc"
}
},
{
"good_click"
:
{
"order"
:
"desc"
}
},
{
"last_update_time"
:
{
"order"
:
"desc"
}
}]
should_list
=
[{
"terms"
:
{
"first_demands"
:
keywords
}
},
{
"terms"
:
{
"second_demands"
:
keywords
}
},
{
"terms"
:
{
"first_solutions"
:
keywords
}
},
{
"terms"
:
{
"second_solutions"
:
keywords
}
},
{
"terms"
:
{
"positions"
:
keywords
}
},
{
"terms"
:
{
"second_positions"
:
keywords
}
},
{
"terms"
:
{
"tags_v3"
:
keywords
}
}]
q
[
"query"
]
=
{
"bool"
:
{
"filter"
:
[{
"term"
:
{
"is_online"
:
True
}
},
{
"term"
:
{
"has_cover"
:
True
}
},
{
"term"
:
{
"is_sink"
:
False
}
},
{
"term"
:
{
"has_after_cover"
:
True
}
},
{
"term"
:
{
"has_before_cover"
:
True
}
},
{
"terms"
:
{
"content_level"
:
[
6
,
5
,
4
,
3.5
,
3
]
}
}],
"should"
:
should_list
,
"minimum_should_match"
:
1
}
}
q
[
"sort"
]
=
sort_list
q
[
"_source"
]
=
{
"includes"
:
[
"id"
,
"second_demands"
]}
es_res
=
es_query
(
"diary"
,
q
,
offset
=
0
,
size
=
5000
)
return
get_result
(
es_res
[
"hits"
][
"hits"
])
def
get_es_tractate
(
keywords
,
version
=
False
):
q
=
{}
should_list
=
[{
"terms"
:
{
"first_demands"
:
keywords
}
},
{
"terms"
:
{
"second_demands"
:
keywords
}
},
{
"terms"
:
{
"first_solutions"
:
keywords
}
},
{
"terms"
:
{
"second_solutions"
:
keywords
}
},
{
"terms"
:
{
"positions"
:
keywords
}
},
{
"terms"
:
{
"second_positions"
:
keywords
}
},
{
"terms"
:
{
"tags_v3"
:
keywords
}
}]
q
[
"query"
]
=
{
"bool"
:
{
"filter"
:
[{
"term"
:
{
"is_online"
:
True
}
},
{
"terms"
:
{
"content_level"
:
[
6
,
5
,
4
,
3.5
,
3
]
}
}],
"should"
:
should_list
,
"minimum_should_match"
:
1
}
}
q
[
"_source"
]
=
{
"includes"
:
[
"id"
,
"second_demands"
]}
if
version
:
q
[
"sort"
]
=
[{
"is_video"
:
{
"order"
:
"asc"
}
},
{
"good_click_tractate_score"
:
{
"order"
:
"desc"
}
},
{
"good_click"
:
{
"order"
:
"desc"
}
}]
else
:
q
[
"sort"
]
=
[{
"is_video"
:
{
"order"
:
"asc"
}},
{
"tractate_score"
:
{
"order"
:
"desc"
}},
{
"good_click"
:
{
"order"
:
"desc"
}}]
es_res
=
es_query
(
"tractate"
,
q
,
offset
=
0
,
size
=
5000
)
return
get_result
(
es_res
[
"hits"
][
"hits"
])
def
get_es_answer
(
keywords
,
version
=
False
):
q
=
{}
if
version
:
sort_list
=
[{
"has_picture"
:
{
"order"
:
"desc"
}},
{
"smart_rank_v3"
:
{
"order"
:
"desc"
}},
{
"good_click"
:
{
"order"
:
"desc"
}}]
else
:
sort_list
=
[{
"has_picture"
:
{
"order"
:
"desc"
}},
{
"smart_rank_v2"
:
{
"order"
:
"desc"
}},
{
"good_click"
:
{
"order"
:
"desc"
}}]
should_list
=
[{
"terms"
:
{
"first_demands"
:
keywords
}
},
{
"terms"
:
{
"second_demands"
:
keywords
}
},
{
"terms"
:
{
"first_solutions"
:
keywords
}
},
{
"terms"
:
{
"second_solutions"
:
keywords
}
},
{
"terms"
:
{
"positions"
:
keywords
}
},
{
"terms"
:
{
"second_positions"
:
keywords
}
},
{
"terms"
:
{
"tags_v3"
:
keywords
}
}]
q
[
"query"
]
=
{
"bool"
:
{
"filter"
:
[{
"range"
:
{
"content_length"
:
{
"gte"
:
30
}
}
},
{
"term"
:
{
"is_online"
:
True
}
},
{
"terms"
:
{
"content_level"
:
[
"6"
,
"5"
,
"4"
,
"3.5"
,
"3"
]
}
}],
"should"
:
should_list
,
"minimum_should_match"
:
1
}
}
q
[
"_source"
]
=
{
"includes"
:
[
"id"
,
"second_demands"
]}
q
[
"sort"
]
=
sort_list
es_res
=
es_query
(
"answer"
,
q
,
offset
=
0
,
size
=
5000
)
return
get_result
(
es_res
[
"hits"
][
"hits"
])
if
__name__
==
"__main__"
:
# print("diary:" + str(get_es_diary(keywords)[:10]) + "\n")
# print("tractate: " + str(get_es_tractate(keywords)[:10]) + "\n")
# print("answer: " + str(get_es_answer(keywords)[:10]) + "\n")
diary_list
=
get_es_diary
(
keywords
)
tractate_list
=
get_es_tractate
(
keywords
)
answer_list
=
get_es_answer
(
keywords
)
redis_client
=
redis
.
StrictRedis
.
from_url
(
"redis://:ReDis!GmTx*0aN9@172.16.40.173:6379"
)
redis_client
.
rpush
(
"doris:tag_v3:coldstart:diary"
,
*
diary_list
)
redis_client
.
rpush
(
"doris:tag_v3:coldstart:tractate"
,
*
tractate_list
)
redis_client
.
rpush
(
"doris:tag_v3:coldstart:answer"
,
*
answer_list
)
print
(
"diary: "
+
str
(
len
(
diary_list
)))
print
(
"tractate: "
+
str
(
len
(
tractate_list
)))
print
(
"answer: "
+
str
(
len
(
answer_list
)))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment