Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
c58ac417
Commit
c58ac417
authored
Sep 04, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
970f1871
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
27 deletions
+16
-27
dist_update_portrait_market.py
eda/smart_rank/dist_update_portrait_market.py
+16
-27
No files found.
eda/smart_rank/dist_update_portrait_market.py
View file @
c58ac417
...
@@ -146,14 +146,25 @@ def compute_ai_scan(x):
...
@@ -146,14 +146,25 @@ def compute_ai_scan(x):
return
0.5
return
0.5
def
get_user_tag_score
(
cl_id
,
all_
log_df
,
all_
word_tags
,
size
=
10
):
def
get_user_tag_score
(
cl_id
,
all_word_tags
,
size
=
10
):
try
:
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
cur_jerry_test
=
db_jerry_test
.
cursor
()
user_log_df
=
all_log_df
.
loc
[(
all_log_df
[
'cl_id'
]
==
cl_id
)
&
(
all_log_df
[
'action'
]
!=
'do_search'
)]
# 用户的非搜索行为
user_df_search
=
all_log_df
.
loc
[(
all_log_df
[
'cl_id'
]
==
cl_id
)
&
(
all_log_df
[
'action'
]
==
'do_search'
)]
user_df_service_sql
=
"select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log "
\
"where cl_id ='{}' and action != 'do_search' "
.
format
(
cl_id
)
cur_jerry_test
.
execute
(
user_df_service_sql
)
user_log_df
=
pd
.
DataFrame
(
list
(
cur_jerry_test
.
fetchall
()))
user_log_df
.
columns
=
[
"time"
,
"cl_id"
,
"score_type"
,
"tag_id"
,
"tag_referrer"
,
"action"
]
# 用户的搜索行为
user_df_search_sql
=
"select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log "
\
"where cl_id ='{}' and action = 'do_search'"
.
format
(
cl_id
)
cur_jerry_test
.
execute
(
user_df_search_sql
)
user_df_search
=
pd
.
DataFrame
(
list
(
cur_jerry_test
.
fetchall
()))
user_df_search
.
columns
=
[
"time"
,
"cl_id"
,
"score_type"
,
"tag_id"
,
"tag_referrer"
,
"action"
]
# 搜索词转成tag
# 搜索词转成tag
for
index
,
row
in
user_df_search
.
iterrows
():
for
index
,
row
in
user_df_search
.
iterrows
():
if
row
[
'tag_referrer'
]
in
all_word_tags
:
if
row
[
'tag_referrer'
]
in
all_word_tags
:
...
@@ -201,26 +212,13 @@ if __name__ == '__main__':
...
@@ -201,26 +212,13 @@ if __name__ == '__main__':
db
=
'jerry_test'
,
charset
=
'utf8'
)
db
=
'jerry_test'
,
charset
=
'utf8'
)
cur_jerry_test
=
db_jerry_test
.
cursor
()
cur_jerry_test
=
db_jerry_test
.
cursor
()
# 获取所有用户的设备id
# sql_device_ids = "select distinct cl_id from user_new_tag_log"
# 获取最近30天内的用户设备id
# 获取最近30天内的用户设备id
sql_device_ids
=
"select distinct cl_id from user_new_tag_log "
\
sql_device_ids
=
"select distinct cl_id from user_new_tag_log "
\
"where time > UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 30 day))"
"where time > UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 30 day))"
cur_jerry_test
.
execute
(
sql_device_ids
)
cur_jerry_test
.
execute
(
sql_device_ids
)
device_ids_lst
=
[
i
[
0
]
for
i
in
cur_jerry_test
.
fetchall
()]
device_ids_lst
=
[
i
[
0
]
for
i
in
cur_jerry_test
.
fetchall
()]
# 获取所有用户的行为日志
# sql_all_log = "select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log"
# 获取最近30天内的用户的所有行为
sql_all_log
=
"select time,cl_id,score_type,tag_id,tag_referrer,action from user_new_tag_log where cl_id in "
\
"(select distinct cl_id from user_new_tag_log "
\
"where time > UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 30 day)))"
cur_jerry_test
.
execute
(
sql_all_log
)
all_log
=
cur_jerry_test
.
fetchall
()
db_jerry_test
.
close
()
db_jerry_test
.
close
()
all_log_df
=
pd
.
DataFrame
(
list
(
all_log
))
all_log_df
.
columns
=
[
"time"
,
"cl_id"
,
"score_type"
,
"tag_id"
,
"tag_referrer"
,
"action"
]
stat_date
=
datetime
.
datetime
.
today
()
.
strftime
(
'
%
Y-
%
m-
%
d'
)
#搜索词及其同义词匹配tag
#搜索词及其同义词匹配tag
all_word_tags
=
get_all_word_tags
()
all_word_tags
=
get_all_word_tags
()
# rdd
# rdd
...
@@ -235,18 +233,9 @@ if __name__ == '__main__':
...
@@ -235,18 +233,9 @@ if __name__ == '__main__':
spark
=
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
enableHiveSupport
()
.
getOrCreate
()
spark
=
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
enableHiveSupport
()
.
getOrCreate
()
spark
.
sparkContext
.
setLogLevel
(
"WARN"
)
spark
.
sparkContext
.
setLogLevel
(
"WARN"
)
device_ids_lst_rdd
=
spark
.
sparkContext
.
parallelize
(
device_ids_lst
)
device_ids_lst_rdd
=
spark
.
sparkContext
.
parallelize
(
device_ids_lst
)
result
=
device_ids_lst_rdd
.
repartition
(
100
)
.
map
(
lambda
x
:
get_user_tag_score
(
x
,
all_
log_df
,
all_
word_tags
))
result
=
device_ids_lst_rdd
.
repartition
(
100
)
.
map
(
lambda
x
:
get_user_tag_score
(
x
,
all_word_tags
))
result
.
collect
()
result
.
collect
()
# result_last = result_rename.withColumn("stat_date", lit(stat_date))
# result_last.show()
# df = result_last.select("stat_date", "cl_id", concat_ws(',', 'tag_list').alias("tag_list"))
# df.show()
# df.write.jdbc(
# mode="overwrite",
# url="jdbc:mysql://172.16.40.158:4000/jerry_test?user=root&password=3SYz54LS9#^9sBvC&useSSL=true",
# table="user_portrait_tags",
# properties={"driver": 'com.mysql.jdbc.Driver'})
except
Exception
as
e
:
except
Exception
as
e
:
send_email
(
"dist_update_portrait_market"
,
"dist_update_portrait_market"
,
"dist_update_portrait_market"
)
send_email
(
"dist_update_portrait_market"
,
"dist_update_portrait_market"
,
"dist_update_portrait_market"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment