Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
ad8ef78a
Commit
ad8ef78a
authored
Oct 28, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
try offline
parent
a073d013
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
192 additions
and
163 deletions
+192
-163
tag3_update_user_portrait_offline.py
eda/smart_rank/tag3_update_user_portrait_offline.py
+77
-60
tool.py
eda/smart_rank/tool.py
+115
-103
No files found.
eda/smart_rank/tag3_update_user_portrait_offline.py
View file @
ad8ef78a
...
...
@@ -9,7 +9,7 @@ from pyspark import SparkConf
from
pyspark.sql
import
SparkSession
from
tool
import
(
get_doris_prod
,
get_redis_client
,
get_tag3_user_log
,
get_user_portrait_tag3_with_score
,
send_email
,
write_user_portrait
,
write_user_portrait_by_event
,
write_user_portrait_doris
)
write_user_portrait
)
# [{'激光': 1.949194898204873}, {'手术': 1.949194898204873}, {'手术': 1.949194898204873}, {'手术': 1.949194898204873, '植发际线': 7.1}]
...
...
@@ -130,59 +130,59 @@ def _get_all_by_projects(name_lst):
return
None
def
update_tag3_user_portrait_by_event
(
cl_id
):
user_df
=
get_tag3_user_log
(
cl_id
)
if
not
user_df
.
empty
:
user_df
[
"first_solutions"
]
=
list
(
zip
(
user_df
[
"first_solutions"
]
.
apply
(
lambda
x
:
x
.
split
(
","
)),
user_df
[
"tag_score"
]))
user_df
[
"second_solutions"
]
=
list
(
zip
(
user_df
[
"second_solutions"
]
.
apply
(
lambda
x
:
x
.
split
(
","
)),
user_df
[
"tag_score"
]))
user_df
[
"first_demands"
]
=
list
(
zip
(
user_df
[
"first_demands"
]
.
apply
(
lambda
x
:
x
.
split
(
","
)),
user_df
[
"tag_score"
]))
user_df
[
"second_demands"
]
=
list
(
zip
(
user_df
[
"second_demands"
]
.
apply
(
lambda
x
:
x
.
split
(
","
)),
user_df
[
"tag_score"
]))
user_df
[
"first_positions"
]
=
list
(
zip
(
user_df
[
"first_positions"
]
.
apply
(
lambda
x
:
x
.
split
(
","
)),
user_df
[
"tag_score"
]))
user_df
[
"second_positions"
]
=
list
(
zip
(
user_df
[
"second_positions"
]
.
apply
(
lambda
x
:
x
.
split
(
","
)),
user_df
[
"tag_score"
]))
user_df
[
"projects"
]
=
list
(
zip
(
user_df
[
"projects"
]
.
apply
(
lambda
x
:
x
.
split
(
","
)),
user_df
[
"tag_score"
]))
user_df
[
"first_solutions_dict"
]
=
user_df
[
"first_solutions"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
user_df
[
"second_solutions_dict"
]
=
user_df
[
"second_solutions"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
user_df
[
"first_demands_dict"
]
=
user_df
[
"first_demands"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
user_df
[
"second_demands_dict"
]
=
user_df
[
"second_demands"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
user_df
[
"first_positions_dict"
]
=
user_df
[
"first_positions"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
user_df
[
"second_positions_dict"
]
=
user_df
[
"second_positions"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
user_df
[
"projects_dict"
]
=
user_df
[
"projects"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
user_df
[
"first_solutions"
]
=
list
(
zip
(
user_df
[
"event_cn"
],
user_df
[
"first_solutions_dict"
]))
user_df
[
"second_solutions"
]
=
list
(
zip
(
user_df
[
"event_cn"
],
user_df
[
"second_solutions_dict"
]))
user_df
[
"first_demands"
]
=
list
(
zip
(
user_df
[
"event_cn"
],
user_df
[
"first_demands_dict"
]))
user_df
[
"second_demands"
]
=
list
(
zip
(
user_df
[
"event_cn"
],
user_df
[
"second_demands_dict"
]))
user_df
[
"first_positions"
]
=
list
(
zip
(
user_df
[
"event_cn"
],
user_df
[
"first_positions_dict"
]))
user_df
[
"second_positions"
]
=
list
(
zip
(
user_df
[
"event_cn"
],
user_df
[
"second_positions_dict"
]))
user_df
[
"projects"
]
=
list
(
zip
(
user_df
[
"event_cn"
],
user_df
[
"projects_dict"
]))
first_solutions_dict
=
merge_results_by_event
(
user_df
[
"first_solutions"
]
.
tolist
())
second_solutions_dict
=
merge_results_by_event
(
user_df
[
"second_solutions"
]
.
tolist
())
first_demands_dict
=
merge_results_by_event
(
user_df
[
"first_demands"
]
.
tolist
())
second_demands_dict
=
merge_results_by_event
(
user_df
[
"second_demands"
]
.
tolist
())
first_positions_dict
=
merge_results_by_event
(
user_df
[
"first_positions"
]
.
tolist
())
second_positions_dict
=
merge_results_by_event
(
user_df
[
"second_positions"
]
.
tolist
())
projects_dict
=
merge_results_by_event
(
user_df
[
"projects"
]
.
tolist
())
events
=
set
(
list
(
first_solutions_dict
.
keys
())
+
list
(
second_solutions_dict
.
keys
())
+
list
(
first_demands_dict
.
keys
())
+
list
(
second_demands_dict
.
keys
())
+
list
(
first_positions_dict
.
keys
())
+
list
(
second_positions_dict
.
keys
())
+
list
(
projects_dict
.
keys
()))
for
e
in
events
:
first_solutions
=
","
.
join
(
first_solutions_dict
.
get
(
e
,
[]))
second_solutions
=
","
.
join
(
second_solutions_dict
.
get
(
e
,
[]))
first_demands
=
","
.
join
(
first_demands_dict
.
get
(
e
,
[]))
second_demands
=
","
.
join
(
second_demands_dict
.
get
(
e
,
[]))
first_positions
=
","
.
join
(
first_positions_dict
.
get
(
e
,
[]))
second_positions
=
","
.
join
(
second_positions_dict
.
get
(
e
,
[]))
projects
=
","
.
join
(
projects_dict
.
get
(
e
,
[]))
write_user_portrait_by_event
(
cl_id
,
first_solutions
,
second_solutions
,
first_demands
,
second_demands
,
first_positions
,
second_positions
,
projects
,
e
)
return
cl_id
#
def update_tag3_user_portrait_by_event(cl_id):
#
user_df = get_tag3_user_log(cl_id)
#
if not user_df.empty:
#
user_df["first_solutions"] = list(zip(user_df["first_solutions"].apply(lambda x: x.split(",")), user_df["tag_score"]))
#
user_df["second_solutions"] = list(zip(user_df["second_solutions"].apply(lambda x: x.split(",")), user_df["tag_score"]))
#
user_df["first_demands"] = list(zip(user_df["first_demands"].apply(lambda x: x.split(",")), user_df["tag_score"]))
#
user_df["second_demands"] = list(zip(user_df["second_demands"].apply(lambda x: x.split(",")), user_df["tag_score"]))
#
user_df["first_positions"] = list(zip(user_df["first_positions"].apply(lambda x: x.split(",")), user_df["tag_score"]))
#
user_df["second_positions"] = list(zip(user_df["second_positions"].apply(lambda x: x.split(",")), user_df["tag_score"]))
#
user_df["projects"] = list(zip(user_df["projects"].apply(lambda x: x.split(",")), user_df["tag_score"]))
#
user_df["first_solutions_dict"] = user_df["first_solutions"].apply(lambda x: make_dict_from_pair(x))
#
user_df["second_solutions_dict"] = user_df["second_solutions"].apply(lambda x: make_dict_from_pair(x))
#
user_df["first_demands_dict"] = user_df["first_demands"].apply(lambda x: make_dict_from_pair(x))
#
user_df["second_demands_dict"] = user_df["second_demands"].apply(lambda x: make_dict_from_pair(x))
#
user_df["first_positions_dict"] = user_df["first_positions"].apply(lambda x: make_dict_from_pair(x))
#
user_df["second_positions_dict"] = user_df["second_positions"].apply(lambda x: make_dict_from_pair(x))
#
user_df["projects_dict"] = user_df["projects"].apply(lambda x: make_dict_from_pair(x))
#
user_df["first_solutions"] = list(zip(user_df["event_cn"], user_df["first_solutions_dict"]))
#
user_df["second_solutions"] = list(zip(user_df["event_cn"], user_df["second_solutions_dict"]))
#
user_df["first_demands"] = list(zip(user_df["event_cn"], user_df["first_demands_dict"]))
#
user_df["second_demands"] = list(zip(user_df["event_cn"], user_df["second_demands_dict"]))
#
user_df["first_positions"] = list(zip(user_df["event_cn"], user_df["first_positions_dict"]))
#
user_df["second_positions"] = list(zip(user_df["event_cn"], user_df["second_positions_dict"]))
#
user_df["projects"] = list(zip(user_df["event_cn"], user_df["projects_dict"]))
#
first_solutions_dict = merge_results_by_event(user_df["first_solutions"].tolist())
#
second_solutions_dict = merge_results_by_event(user_df["second_solutions"].tolist())
#
first_demands_dict = merge_results_by_event(user_df["first_demands"].tolist())
#
second_demands_dict = merge_results_by_event(user_df["second_demands"].tolist())
#
first_positions_dict = merge_results_by_event(user_df["first_positions"].tolist())
#
second_positions_dict = merge_results_by_event(user_df["second_positions"].tolist())
#
projects_dict = merge_results_by_event(user_df["projects"].tolist())
#
events = set(
#
list(first_solutions_dict.keys()) + list(second_solutions_dict.keys()) + list(first_demands_dict.keys()) +
#
list(second_demands_dict.keys()) + list(first_positions_dict.keys()) + list(second_positions_dict.keys()) +
#
list(projects_dict.keys()))
#
for e in events:
#
first_solutions = ",".join(first_solutions_dict.get(e, []))
#
second_solutions = ",".join(second_solutions_dict.get(e, []))
#
first_demands = ",".join(first_demands_dict.get(e, []))
#
second_demands = ",".join(second_demands_dict.get(e, []))
#
first_positions = ",".join(first_positions_dict.get(e, []))
#
second_positions = ",".join(second_positions_dict.get(e, []))
#
projects = ",".join(projects_dict.get(e, []))
#
write_user_portrait_by_event(cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions,
#
second_positions, projects, e)
#
return cl_id
def
update_tag3_user_portrait
(
cl_id
):
...
...
@@ -195,6 +195,7 @@ def update_tag3_user_portrait(cl_id):
user_df
[
"first_positions"
]
=
list
(
zip
(
user_df
[
"first_positions"
]
.
apply
(
lambda
x
:
x
.
split
(
","
)),
user_df
[
"tag_score"
]))
user_df
[
"second_positions"
]
=
list
(
zip
(
user_df
[
"second_positions"
]
.
apply
(
lambda
x
:
x
.
split
(
","
)),
user_df
[
"tag_score"
]))
user_df
[
"projects"
]
=
list
(
zip
(
user_df
[
"projects"
]
.
apply
(
lambda
x
:
x
.
split
(
","
)),
user_df
[
"tag_score"
]))
user_df
[
"business_tags"
]
=
list
(
zip
(
user_df
[
"business_tags"
]
.
apply
(
lambda
x
:
x
.
split
(
","
)),
user_df
[
"tag_score"
]))
user_df
[
"first_solutions_dict"
]
=
user_df
[
"first_solutions"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
user_df
[
"second_solutions_dict"
]
=
user_df
[
"second_solutions"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
...
...
@@ -203,6 +204,7 @@ def update_tag3_user_portrait(cl_id):
user_df
[
"first_positions_dict"
]
=
user_df
[
"first_positions"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
user_df
[
"second_positions_dict"
]
=
user_df
[
"second_positions"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
user_df
[
"projects_dict"
]
=
user_df
[
"projects"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
user_df
[
"business_tags_dict"
]
=
user_df
[
"business_tags"
]
.
apply
(
lambda
x
:
make_dict_from_pair
(
x
))
first_solutions_list
=
user_df
[
"first_solutions_dict"
]
.
tolist
()
second_solutions_list
=
user_df
[
"second_solutions_dict"
]
.
tolist
()
...
...
@@ -211,6 +213,7 @@ def update_tag3_user_portrait(cl_id):
first_positions_list
=
user_df
[
"first_positions_dict"
]
.
tolist
()
second_positions_list
=
user_df
[
"second_positions_dict"
]
.
tolist
()
projects_list
=
user_df
[
"projects_dict"
]
.
tolist
()
business_tags_list
=
user_df
[
"business_tags_dict"
]
.
tolist
()
first_demands_score
=
merge_values
(
first_demands_list
)
second_demands_score
=
merge_values
(
second_demands_list
)
...
...
@@ -220,6 +223,7 @@ def update_tag3_user_portrait(cl_id):
second_positions_score
=
merge_values
(
second_positions_list
)
# projects_score = merge_values(projects_list)
projects_score_tmp
=
merge_values
(
projects_list
)
business_tags_score
=
merge_values
(
business_tags_list
)
tmp
=
[
"不感兴趣"
,
"没有想法"
,
"八卦来了"
,
"颜值速报"
,
"医美审美干货"
,
"其他项目"
,
"网红颜值"
,
"少年之名"
,
"郑人予"
,
"热点课代表"
,
"私密"
,
"陈瑞泽"
,
"符仁杰"
,
"祖力亚尔"
,
"刘泽旭"
,
...
...
@@ -248,7 +252,8 @@ def update_tag3_user_portrait(cl_id):
"second_solutions"
:
second_solutions_score
,
"first_positions"
:
first_positions_score
,
"second_positions"
:
second_positions_score
,
"projects"
:
projects_score
"projects"
:
projects_score
,
"business_tags"
:
business_tags_score
}
if
tmp_res
:
res
.
update
(
tmp_res
)
...
...
@@ -262,14 +267,21 @@ def update_tag3_user_portrait(cl_id):
if
(
len
(
first_demands_score
.
keys
())
>
0
)
or
(
len
(
second_demands_score
.
keys
())
>
0
)
or
\
(
len
(
first_solutions_score
.
keys
())
>
0
)
or
(
len
(
second_solutions_score
.
keys
())
>
0
)
or
\
(
len
(
first_positions_score
.
keys
())
>
0
)
or
(
len
(
second_positions_score
.
keys
())
>
0
)
or
\
(
len
(
projects_score
.
keys
())
>
0
):
(
len
(
projects_score
.
keys
())
>
0
)
or
(
len
(
business_tags_score
.
keys
())
>
0
)
:
redis_client
.
set
(
key
,
json
.
dumps
(
res
))
redis_client
.
expire
(
key
,
60
*
60
*
24
*
180
)
write_user_portrait
(
cl_id
,
","
.
join
(
first_solutions_score
.
keys
()),
","
.
join
(
second_solutions_score
.
keys
()),
","
.
join
(
first_demands_score
.
keys
()),
","
.
join
(
second_demands_score
.
keys
()),
","
.
join
(
first_positions_score
.
keys
()),
","
.
join
(
second_positions_score
.
keys
()),
","
.
join
(
projects_score
.
keys
()))
write_user_portrait
(
cl_id
,
","
.
join
(
first_solutions_score
.
keys
()),
","
.
join
(
second_solutions_score
.
keys
()),
","
.
join
(
first_demands_score
.
keys
()),
","
.
join
(
second_demands_score
.
keys
()),
","
.
join
(
first_positions_score
.
keys
()),
","
.
join
(
second_positions_score
.
keys
()),
","
.
join
(
projects_score
.
keys
()),
","
.
join
(
business_tags_score
.
keys
()),
)
# body = {}
# for (k, v) in res.items():
...
...
@@ -333,6 +345,11 @@ if __name__ == "__main__":
# update_tag3_user_portrait("862460044588666")
# update_tag3_user_portrait("androidid_a25a1129c0b38f7b")
# cl_id = "864350041167473"
# df = get_tag3_user_log(cl_id)
# df[["projects", "business_tags"]]
# update_tag3_user_portrait(cl_id)
consume_kafka
()
end
=
datetime
.
datetime
.
now
()
print
(
end
-
start
)
...
...
eda/smart_rank/tool.py
View file @
ad8ef78a
# coding: utf-8
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
pymysql
from
__future__
import
absolute_import
,
division
,
print_function
import
datetime
import
json
import
smtplib
import
time
import
traceback
from
email.mime.application
import
MIMEApplication
from
email.mime.multipart
import
MIMEMultipart
from
email.mime.text
import
MIMEText
from
email.utils
import
formataddr
from
email.mime.multipart
import
MIMEMultipart
from
email.mime.application
import
MIMEApplication
import
redis
import
datetime
import
time
import
json
import
numpy
as
np
import
pandas
as
pd
import
traceback
import
pymysql
import
redis
def
send_email
(
app
,
id
,
e
):
def
send_email
(
app
,
id
,
e
):
# 第三方 SMTP 服务
mail_host
=
'smtp.exmail.qq.com'
# 设置服务器
mail_host
=
"smtp.exmail.qq.com"
# 设置服务器
mail_user
=
"zhaowei@igengmei.com"
# 用户名
mail_pass
=
"Gengmei1234"
# 口令
sender
=
'zhaowei@igengmei.com'
receivers
=
[
'zhaowei@igengmei.com'
]
# 接收邮件,可设置为你的QQ邮箱或者其他邮箱
sender
=
"zhaowei@igengmei.com"
receivers
=
[
"zhaowei@igengmei.com"
]
# 接收邮件,可设置为你的QQ邮箱或者其他邮箱
e
=
str
(
e
)
msg
=
MIMEMultipart
()
part
=
MIMEText
(
'app_id:'
+
id
+
':fail'
+
"
\n
"
+
e
,
'plain'
,
'utf-8'
)
part
=
MIMEText
(
"app_id:"
+
id
+
":fail"
+
"
\n
"
+
e
,
"plain"
,
"utf-8"
)
msg
.
attach
(
part
)
msg
[
'From'
]
=
formataddr
([
"gaoyazhe"
,
sender
])
msg
[
"From"
]
=
formataddr
([
"gaoyazhe"
,
sender
])
# 括号里的对应收件人邮箱昵称、收件人邮箱账号
msg
[
'To'
]
=
";"
.
join
(
receivers
)
msg
[
"To"
]
=
";"
.
join
(
receivers
)
# message['Cc'] = ";".join(cc_reciver)
msg
[
'Subject'
]
=
'spark streaming:app_name:'
+
app
msg
[
"Subject"
]
=
"spark streaming:app_name:"
+
app
try
:
with
open
(
'error.txt'
,
'w'
)
as
f
:
with
open
(
"error.txt"
,
"w"
)
as
f
:
f
.
write
(
e
)
f
.
close
()
part
=
MIMEApplication
(
open
(
'error.txt'
,
'r'
)
.
read
())
part
.
add_header
(
'Content-Disposition'
,
'attachment'
,
filename
=
"error.txt"
)
part
=
MIMEApplication
(
open
(
"error.txt"
,
"r"
)
.
read
())
part
.
add_header
(
"Content-Disposition"
,
"attachment"
,
filename
=
"error.txt"
)
msg
.
attach
(
part
)
except
Exception
as
e
:
print
(
e
)
...
...
@@ -51,7 +51,7 @@ def send_email(app,id,e):
smtpObj
.
login
(
mail_user
,
mail_pass
)
smtpObj
.
sendmail
(
sender
,
receivers
,
msg
.
as_string
())
except
smtplib
.
SMTPException
:
print
(
'error'
)
print
(
"error"
)
def
get_data_by_mysql
(
host
,
port
,
user
,
passwd
,
db
,
sql
):
...
...
@@ -90,13 +90,13 @@ def get_all_search_word_synonym_tags():
"left join api_wordrelsynonym b on a.id = b.wordrel_id "
\
"left join api_tag c on b.word=c.name "
\
"where c.tag_type+0<'4'+0 and c.is_online=1"
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'zx_str'
,
'ZXueX58pStrage'
,
'zhengxing'
,
sql
)
mysql_results
=
get_data_by_mysql
(
"172.16.30.141"
,
3306
,
"zx_str"
,
"ZXueX58pStrage"
,
"zhengxing"
,
sql
)
result_dict
=
dict
()
for
data
in
mysql_results
:
if
data
[
'keyword'
]
not
in
result_dict
:
result_dict
[
data
[
'keyword'
]]
=
[
data
[
'id'
]]
if
data
[
"keyword"
]
not
in
result_dict
:
result_dict
[
data
[
"keyword"
]]
=
[
data
[
"id"
]]
else
:
result_dict
[
data
[
'keyword'
]]
.
append
(
data
[
'id'
])
result_dict
[
data
[
"keyword"
]]
.
append
(
data
[
"id"
])
return
result_dict
except
Exception
as
e
:
print
(
e
)
...
...
@@ -106,13 +106,13 @@ def get_all_word_synonym_words():
try
:
sql
=
"select a.keyword, b.word from api_wordrel a "
\
"left join api_wordrelsynonym b on a.id = b.wordrel_id "
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'zx_str'
,
'ZXueX58pStrage'
,
'zhengxing'
,
sql
)
mysql_results
=
get_data_by_mysql
(
"172.16.30.141"
,
3306
,
"zx_str"
,
"ZXueX58pStrage"
,
"zhengxing"
,
sql
)
result_dict
=
dict
()
for
data
in
mysql_results
:
if
data
[
'keyword'
]
not
in
result_dict
:
result_dict
[
data
[
'keyword'
]]
=
[
data
[
'word'
]]
if
data
[
"keyword"
]
not
in
result_dict
:
result_dict
[
data
[
"keyword"
]]
=
[
data
[
"word"
]]
else
:
result_dict
[
data
[
'keyword'
]]
.
append
(
data
[
'word'
])
result_dict
[
data
[
"keyword"
]]
.
append
(
data
[
"word"
])
return
result_dict
except
Exception
as
e
:
print
(
e
)
...
...
@@ -125,13 +125,13 @@ def get_all_synonym_tags():
try
:
sql
=
"select a.word, b.id from api_wordrelsynonym a left join api_tag b "
\
"on a.word=b.name where b.tag_type+0<'4'+0 and b.is_online=1"
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'zx_str'
,
'ZXueX58pStrage'
,
'zhengxing'
,
sql
)
mysql_results
=
get_data_by_mysql
(
"172.16.30.141"
,
3306
,
"zx_str"
,
"ZXueX58pStrage"
,
"zhengxing"
,
sql
)
result_dict
=
dict
()
for
data
in
mysql_results
:
if
data
[
'word'
]
not
in
result_dict
:
result_dict
[
data
[
'word'
]]
=
[
data
[
'id'
]]
if
data
[
"word"
]
not
in
result_dict
:
result_dict
[
data
[
"word"
]]
=
[
data
[
"id"
]]
else
:
result_dict
[
data
[
'word'
]]
.
append
(
data
[
'id'
])
result_dict
[
data
[
"word"
]]
.
append
(
data
[
"id"
])
return
result_dict
except
Exception
as
e
:
print
(
e
)
...
...
@@ -143,13 +143,13 @@ def get_all_api_tags():
"""
try
:
sql
=
"select name, id from api_tag where tag_type in ('1', '2', '3', '5') and is_online=1"
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'zx_str'
,
'ZXueX58pStrage'
,
'zhengxing'
,
sql
)
mysql_results
=
get_data_by_mysql
(
"172.16.30.141"
,
3306
,
"zx_str"
,
"ZXueX58pStrage"
,
"zhengxing"
,
sql
)
result_dict
=
dict
()
for
data
in
mysql_results
:
if
data
[
'name'
]
not
in
result_dict
:
result_dict
[
data
[
'name'
]]
=
[
data
[
'id'
]]
if
data
[
"name"
]
not
in
result_dict
:
result_dict
[
data
[
"name"
]]
=
[
data
[
"id"
]]
else
:
result_dict
[
data
[
'name'
]]
.
append
(
data
[
'id'
])
result_dict
[
data
[
"name"
]]
.
append
(
data
[
"id"
])
return
result_dict
except
Exception
as
e
:
print
(
e
)
...
...
@@ -171,10 +171,10 @@ def get_all_tag_tag_type():
"""
try
:
sql
=
"select id,tag_type from api_tag where tag_type+0<'4'+0 and is_online=1"
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'zx_str'
,
'ZXueX58pStrage'
,
'zhengxing'
,
sql
)
mysql_results
=
get_data_by_mysql
(
"172.16.30.141"
,
3306
,
"zx_str"
,
"ZXueX58pStrage"
,
"zhengxing"
,
sql
)
result_dict
=
dict
()
for
data
in
mysql_results
:
result_dict
[
data
[
'id'
]]
=
data
[
'tag_type'
]
result_dict
[
data
[
"id"
]]
=
data
[
"tag_type"
]
return
result_dict
except
Exception
as
e
:
print
(
e
)
...
...
@@ -186,13 +186,13 @@ def get_all_3tag_2tag():
" left join api_tag b on a.parent_id=b.id "
\
"where a.child_id in (select id from api_tag where tag_type='3' and is_online=1) "
\
"and b.tag_type='2' and b.is_online=1"
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'zx_str'
,
'ZXueX58pStrage'
,
'zhengxing'
,
sql
)
mysql_results
=
get_data_by_mysql
(
"172.16.30.141"
,
3306
,
"zx_str"
,
"ZXueX58pStrage"
,
"zhengxing"
,
sql
)
result_dict
=
dict
()
for
data
in
mysql_results
:
if
data
[
'child_id'
]
not
in
result_dict
:
result_dict
[
data
[
'child_id'
]]
=
[
data
[
'parent_id'
]]
if
data
[
"child_id"
]
not
in
result_dict
:
result_dict
[
data
[
"child_id"
]]
=
[
data
[
"parent_id"
]]
else
:
result_dict
[
data
[
'child_id'
]]
.
append
(
data
[
'parent_id'
])
result_dict
[
data
[
"child_id"
]]
.
append
(
data
[
"parent_id"
])
return
result_dict
except
Exception
as
e
:
print
(
e
)
...
...
@@ -204,13 +204,13 @@ def get_all_tag_parent_tag():
" left join api_tag b on a.parent_id=b.id "
\
"where a.child_id in (select id from api_tag where tag_type+0<'4'+0 and is_online=1) "
\
"and b.tag_type+0<'4'+0 and b.is_online=1"
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'zx_str'
,
'ZXueX58pStrage'
,
'zhengxing'
,
sql
)
mysql_results
=
get_data_by_mysql
(
"172.16.30.141"
,
3306
,
"zx_str"
,
"ZXueX58pStrage"
,
"zhengxing"
,
sql
)
result_dict
=
dict
()
for
data
in
mysql_results
:
if
data
[
'child_id'
]
not
in
result_dict
:
result_dict
[
data
[
'child_id'
]]
=
[
data
[
'parent_id'
]]
if
data
[
"child_id"
]
not
in
result_dict
:
result_dict
[
data
[
"child_id"
]]
=
[
data
[
"parent_id"
]]
else
:
result_dict
[
data
[
'child_id'
]]
.
append
(
data
[
'parent_id'
])
result_dict
[
data
[
"child_id"
]]
.
append
(
data
[
"parent_id"
])
return
result_dict
except
Exception
as
e
:
print
(
e
)
...
...
@@ -219,10 +219,10 @@ def get_all_tag_parent_tag():
def
get_all_tags_name
():
try
:
sql
=
"select id, name from api_tag where tag_type+0<'4'+0 and is_online=1"
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'zx_str'
,
'ZXueX58pStrage'
,
'zhengxing'
,
sql
)
mysql_results
=
get_data_by_mysql
(
"172.16.30.141"
,
3306
,
"zx_str"
,
"ZXueX58pStrage"
,
"zhengxing"
,
sql
)
result_dict
=
dict
()
for
data
in
mysql_results
:
result_dict
[
data
[
'id'
]]
=
data
[
'name'
]
result_dict
[
data
[
"id"
]]
=
data
[
"name"
]
return
result_dict
except
Exception
as
e
:
print
(
e
)
...
...
@@ -244,57 +244,67 @@ def get_tag2_from_tag3(tag3, all_3tag_2tag, user_log_df_tag2_list):
def
compute_henqiang
(
x
,
decay_days
=
30
,
exponential
=
0
,
action_tag_count
=
1
):
if
exponential
:
alpha
=
exponential_decay
(
x
,
decay_days
)
score
=
15
/
action_tag_count
-
1.1
**
alpha
*
((
15
-
0.5
)
/
decay_days
)
score
=
15
/
action_tag_count
-
1.1
**
alpha
*
((
15
-
0.5
)
/
decay_days
)
else
:
score
=
15
/
action_tag_count
-
x
*
((
15
-
0.5
)
/
decay_days
)
score
=
15
/
action_tag_count
-
x
*
((
15
-
0.5
)
/
decay_days
)
if
score
>
0.5
:
return
score
else
:
return
0.5
def
compute_jiaoqiang
(
x
,
decay_days
=
30
,
exponential
=
0
,
action_tag_count
=
1
):
if
exponential
:
alpha
=
exponential_decay
(
x
,
decay_days
)
score
=
12
/
action_tag_count
-
1.1
**
alpha
*
((
12
-
0.5
)
/
decay_days
)
score
=
12
/
action_tag_count
-
1.1
**
alpha
*
((
12
-
0.5
)
/
decay_days
)
else
:
score
=
12
/
action_tag_count
-
x
*
((
12
-
0.5
)
/
decay_days
)
score
=
12
/
action_tag_count
-
x
*
((
12
-
0.5
)
/
decay_days
)
if
score
>
0.5
:
return
score
else
:
return
0.5
def
compute_ruoyixiang
(
x
,
decay_days
=
30
,
exponential
=
0
,
action_tag_count
=
1
):
if
exponential
:
alpha
=
exponential_decay
(
x
,
decay_days
)
score
=
5
/
action_tag_count
-
1.1
**
alpha
*
((
5
-
0.5
)
/
decay_days
)
score
=
5
/
action_tag_count
-
1.1
**
alpha
*
((
5
-
0.5
)
/
decay_days
)
else
:
score
=
5
/
action_tag_count
-
x
*
((
5
-
0.5
)
/
decay_days
)
score
=
5
/
action_tag_count
-
x
*
((
5
-
0.5
)
/
decay_days
)
if
score
>
0.5
:
return
score
else
:
return
0.5
def
compute_validate
(
x
,
decay_days
=
30
,
exponential
=
0
,
action_tag_count
=
1
):
if
exponential
:
alpha
=
exponential_decay
(
x
,
decay_days
)
score
=
10
/
action_tag_count
-
1.1
**
alpha
*
((
10
-
0.5
)
/
decay_days
)
score
=
10
/
action_tag_count
-
1.1
**
alpha
*
((
10
-
0.5
)
/
decay_days
)
else
:
score
=
10
/
action_tag_count
-
x
*
((
10
-
0.5
)
/
decay_days
)
score
=
10
/
action_tag_count
-
x
*
((
10
-
0.5
)
/
decay_days
)
if
score
>
0.5
:
return
score
else
:
return
0.5
def
compute_ai_scan
(
x
,
decay_days
=
30
,
exponential
=
0
,
action_tag_count
=
1
):
if
exponential
:
alpha
=
exponential_decay
(
x
,
decay_days
)
score
=
2
/
action_tag_count
-
1.1
**
alpha
*
((
2
-
0.5
)
/
decay_days
)
score
=
2
/
action_tag_count
-
1.1
**
alpha
*
((
2
-
0.5
)
/
decay_days
)
else
:
score
=
2
/
action_tag_count
-
x
*
((
2
-
0.5
)
/
decay_days
)
score
=
2
/
action_tag_count
-
x
*
((
2
-
0.5
)
/
decay_days
)
if
score
>
0.5
:
return
score
else
:
return
0.5
def
get_action_tag_count
(
df
,
action_time
):
try
:
if
not
df
[
df
[
'time'
]
==
action_time
]
.
empty
:
return
len
(
df
[
df
[
'time'
]
==
action_time
])
if
not
df
[
df
[
"time"
]
==
action_time
]
.
empty
:
return
len
(
df
[
df
[
"time"
]
==
action_time
])
else
:
return
1
except
Exception
as
e
:
...
...
@@ -303,7 +313,7 @@ def get_action_tag_count(df, action_time):
def
exponential_decay
(
days_diff
,
decay_days
=
30
):
# 天数差归一化到[0, decay_days]
x
=
np
.
arange
(
1
,
180
+
1
,
1
)
x
=
np
.
arange
(
1
,
180
+
1
,
1
)
a
=
(
decay_days
-
0
)
*
(
days_diff
-
min
(
x
))
/
(
max
(
x
)
-
min
(
x
))
return
a
...
...
@@ -315,8 +325,12 @@ def args_test(x):
def
get_user_log
(
cl_id
,
all_word_tags
,
pay_time
=
0
,
debug
=
0
):
user_df_service
=
pd
.
DataFrame
(
columns
=
[
"time"
,
"cl_id"
,
"score_type"
,
"tag_id"
,
"tag_referrer"
,
"action"
])
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
db_jerry_test
=
pymysql
.
connect
(
host
=
"172.16.40.158"
,
port
=
4000
,
user
=
"st_user"
,
passwd
=
"aqpuBLYzEV7tML5RPsN1pntUzFy"
,
db
=
"jerry_test"
,
charset
=
"utf8"
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
if
pay_time
==
0
:
user_df_service_sql
=
"select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log "
\
...
...
@@ -335,19 +349,19 @@ def get_user_log(cl_id, all_word_tags, pay_time=0, debug=0):
user_df_search
=
user_df_service
[
user_df_service
[
"action"
]
==
"do_search"
]
if
debug
:
# 用户的非搜索、支付行为
user_df_service
=
user_df_service
.
loc
[
~
user_df_service
[
"action"
]
.
isin
([
"do_search"
,
"api/settlement/alipay_callback"
]
)]
user_df_service
=
user_df_service
.
loc
[
~
user_df_service
[
"action"
]
.
isin
([
"do_search"
,
"api/settlement/alipay_callback"
]
)]
else
:
# 用户的非搜索行为
user_df_service
=
user_df_service
.
loc
[
~
user_df_service
[
"action"
]
.
isin
([
"do_search"
])]
# 搜索词转成tag,合并用户日志
user_df_search_dict
=
dict
()
for
index
,
row
in
user_df_search
.
iterrows
():
if
row
[
'tag_referrer'
]
in
all_word_tags
:
word_tag_list
=
all_word_tags
[
row
[
'tag_referrer'
]]
row
[
'tag_id'
]
=
int
(
word_tag_list
[
0
])
if
word_tag_list
else
-
1
if
row
[
"tag_referrer"
]
in
all_word_tags
:
word_tag_list
=
all_word_tags
[
row
[
"tag_referrer"
]]
row
[
"tag_id"
]
=
int
(
word_tag_list
[
0
])
if
word_tag_list
else
-
1
else
:
row
[
'tag_id'
]
=
-
1
row
[
"tag_id"
]
=
-
1
user_df_service
=
user_df_service
.
append
(
user_df_search
)
return
user_df_service
[
user_df_service
[
"tag_id"
]
!=
-
1
]
except
:
...
...
@@ -366,12 +380,7 @@ def get_jerry_test():
def
get_doris_prod
():
db
=
pymysql
.
connect
(
host
=
"172.16.30.136"
,
port
=
3306
,
user
=
"doris"
,
passwd
=
"o5gbA27hXHHm"
,
db
=
"doris_prod"
,
charset
=
"utf8"
)
db
=
pymysql
.
connect
(
host
=
"172.16.30.136"
,
port
=
3306
,
user
=
"doris"
,
passwd
=
"o5gbA27hXHHm"
,
db
=
"doris_prod"
,
charset
=
"utf8"
)
return
db
,
db
.
cursor
()
...
...
@@ -391,11 +400,11 @@ def compute_tag3_score(x):
def
get_tag3_user_log
(
cl_id
):
columns
=
[
"log_time"
,
"score_type"
,
"event_cn"
,
"first_solutions"
,
"second_solutions"
,
"first_demands"
,
"second_demands"
,
"first_positions"
,
"second_positions"
,
"projects"
"first_positions"
,
"second_positions"
,
"projects"
,
"business_tags"
]
try
:
sql
=
"""select log_time, score_type, event_cn, first_solutions, second_solutions, first_demands,
second_demands, first_positions, second_positions, projects
second_demands, first_positions, second_positions, projects
, business_tags
from kafka_tag3_log where cl_id = '{}'"""
.
format
(
cl_id
)
db
,
cursor
=
get_doris_prod
()
...
...
@@ -412,6 +421,7 @@ def get_tag3_user_log(cl_id):
user_df
[
"days_diff_now"
]
=
round
((
int
(
time
.
time
())
-
user_df
[
"log_time"
]
.
astype
(
float
))
/
(
24
*
60
*
60
))
user_df
[
"tag_score"
]
=
user_df
.
apply
(
lambda
x
:
compute_tag3_score
(
x
),
axis
=
1
)
user_df
[
"business_tags"
]
=
user_df
[
"business_tags"
]
.
fillna
(
""
)
return
user_df
except
Exception
as
e
:
print
(
e
)
...
...
@@ -431,15 +441,16 @@ def get_tag3_user_log(cl_id):
# `projects` text NOT NULL,
# PRIMARY KEY(`id`)
# )
def
write_user_portrait
(
cl_id
,
first_solutions
,
second_solutions
,
first_demands
,
second_demands
,
first_positions
,
second_positions
,
projects
):
# ALTER TABLE `user_tag3_portrait` ADD COLUMN business_tags text COMMENT '商业标签';
def
write_user_portrait
(
cl_id
,
first_solutions
,
second_solutions
,
first_demands
,
second_demands
,
first_positions
,
second_positions
,
projects
,
business_tags
):
try
:
today
=
datetime
.
date
.
today
()
oneday
=
datetime
.
timedelta
(
days
=
1
)
yesterday
=
today
-
oneday
sql
=
"""insert into user_tag3_portrait values(null, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')"""
.
format
(
yesterday
,
cl_id
,
first_solutions
,
second_solutions
,
first_demands
,
second_demands
,
first_positions
,
second_positions
,
project
s
)
sql
=
"""insert into user_tag3_portrait values(null, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}'
, '{}'
)"""
.
format
(
yesterday
,
cl_id
,
first_solutions
,
second_solutions
,
first_demands
,
second_demands
,
first_positions
,
second_positions
,
projects
,
business_tag
s
)
db
,
cursor
=
get_jerry_test
()
cursor
.
execute
(
sql
)
...
...
@@ -451,24 +462,24 @@ def write_user_portrait(cl_id, first_solutions, second_solutions, first_demands,
print
(
e
)
def
write_user_portrait_doris
(
cl_id
,
first_solutions
,
second_solutions
,
first_demands
,
second_demands
,
first_positions
,
second_positions
,
projects
):
try
:
today
=
datetime
.
date
.
today
()
oneday
=
datetime
.
timedelta
(
days
=
1
)
yesterday
=
today
-
oneday
sql
=
"""insert into user_tag3_portrait values(null, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')"""
.
format
(
yesterday
,
cl_id
,
first_solutions
,
second_solutions
,
first_demands
,
second_demands
,
first_positions
,
second_positions
,
projects
)
#
def write_user_portrait_doris(cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions,
#
second_positions, projects):
#
try:
#
today = datetime.date.today()
#
oneday = datetime.timedelta(days=1)
#
yesterday = today - oneday
#
sql = """insert into user_tag3_portrait values(null, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')""".format(
#
yesterday, cl_id, first_solutions, second_solutions, first_demands, second_demands, first_positions, second_positions,
#
projects)
db
,
cursor
=
get_doris_prod
()
cursor
.
execute
(
sql
)
db
.
commit
()
db
.
close
()
cursor
.
close
()
except
Exception
as
e
:
print
(
"write db error"
)
print
(
e
)
#
db, cursor = get_doris_prod()
#
cursor.execute(sql)
#
db.commit()
#
db.close()
#
cursor.close()
#
except Exception as e:
#
print("write db error")
#
print(e)
# CREATE TABLE `user_tag3_event_portrait` (
...
...
@@ -506,7 +517,7 @@ def write_user_portrait_by_event(cl_id, first_solutions, second_solutions, first
def
get_redis_client
():
return
redis
.
StrictRedis
.
from_url
(
'redis://:ReDis!GmTx*0aN9@172.16.40.173:6379'
)
return
redis
.
StrictRedis
.
from_url
(
"redis://:ReDis!GmTx*0aN9@172.16.40.173:6379"
)
def
get_user_portrait_tag3_from_redis
(
device_id
,
limit_score
=
0
):
...
...
@@ -514,6 +525,7 @@ def get_user_portrait_tag3_from_redis(device_id, limit_score=0):
new_d
=
dict
(
sorted
(
d
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
))
res
=
{
tag
:
float
(
score
)
for
tag
,
score
in
new_d
.
items
()
if
float
(
score
)
>=
limit_score
}
return
list
(
res
.
keys
())
portrait_key
=
"doris:user_portrait:tag3:device_id:"
+
str
(
device_id
)
redis_client
=
get_redis_client
()
if
redis_client
.
exists
(
portrait_key
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment