Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
74150978
Commit
74150978
authored
Aug 05, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
try es
parent
c1824411
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
80 additions
and
20 deletions
+80
-20
es_tool.py
eda/smart_rank/es_tool.py
+12
-2
tag3_update_user_portrait_offline.py
eda/smart_rank/tag3_update_user_portrait_offline.py
+26
-18
tool.py
eda/smart_rank/tool.py
+42
-0
No files found.
eda/smart_rank/es_tool.py
View file @
74150978
from
elasticsearch
import
Elasticsearch
as
Es
import
json
def
get_es
():
...
...
@@ -38,4 +39,14 @@ def es_mquery(doc, body, es=None):
es
=
get_es
()
index
=
es_index_adapt
(
index_prefix
=
'gm-dbmw'
,
doc_type
=
doc
,
rw
=
'read'
)
res
=
es
.
msearch
(
body
,
index
=
index
)
return
res
\ No newline at end of file
return
res
def
es_insert_device_info
(
device_id
,
body
,
es
=
None
,
rw
=
None
):
if
es
is
None
:
es
=
get_es
()
index
=
es_index_adapt
(
index_prefix
=
"gm-dbmw"
,
doc_type
=
"device"
,
rw
=
None
)
bulk_head
=
'{"index": {"_id":"
%
s"}}'
%
device_id
data_str
=
json
.
dumps
(
body
,
ensure_ascii
=
False
)
bulk_one_body
=
bulk_head
+
"
\n
"
+
data_str
+
"
\n
"
return
es
.
bulk
(
index
=
index
,
doc_type
=
"device"
,
body
=
bulk_one_body
)
eda/smart_rank/tag3_update_user_portrait_offline.py
View file @
74150978
...
...
@@ -4,13 +4,15 @@ import json
import
operator
from
collections
import
Counter
import
redis
import
pymysql
import
redis
from
pyspark
import
SparkConf
from
pyspark.sql
import
SparkSession
from
tool
import
(
get_doris_prod
,
get_jerry_test
,
get_tag3_user_log
,
send_email
,
write_user_portrait
,
write_user_portrait_by_event
,
write_user_portrait_doris
)
from
es_tool
import
es_insert_device_info
from
tool
import
(
get_doris_prod
,
get_jerry_test
,
get_redis_client
,
get_tag3_user_log
,
get_user_portrait_tag3_from_redis
,
get_user_portrait_tag3_with_score
,
send_email
,
write_user_portrait
,
write_user_portrait_by_event
,
write_user_portrait_doris
)
# [{'激光': 1.949194898204873}, {'手术': 1.949194898204873}, {'手术': 1.949194898204873}, {'手术': 1.949194898204873, '植发际线': 7.1}]
...
...
@@ -255,24 +257,28 @@ def update_tag3_user_portrait(cl_id):
res
.
update
(
tmp_res
)
key
=
"doris:user_portrait:tag3:device_id:"
+
str
(
cl_id
)
redis_client
=
redis
.
StrictRedis
.
from_url
(
"redis://:ReDis!GmTx*0aN9@172.16.40.173:6379"
)
redis_client
=
get_redis_client
()
anecdote_tags_scores
=
get_user_portrait_tag3_with_score
(
cl_id
)
.
get
(
"anecdote_tags"
,
{})
print
(
anecdote_tags_scores
)
if
(
len
(
first_demands_score
.
keys
())
>
0
)
or
(
len
(
second_demands_score
.
keys
())
>
0
)
or
\
(
len
(
first_solutions_score
.
keys
())
>
0
)
or
(
len
(
second_solutions_score
.
keys
())
>
0
)
or
\
(
len
(
first_positions_score
.
keys
())
>
0
)
or
(
len
(
second_positions_score
.
keys
())
>
0
)
or
\
(
len
(
projects_score
.
keys
())
>
0
):
redis_client
.
set
(
key
,
json
.
dumps
(
res
))
redis_client
.
expire
(
key
,
60
*
60
*
24
*
180
)
# TODO
# redis_client.set(key, json.dumps(res))
# redis_client.expire(key, 60 * 60 * 24 * 180)
write_user_portrait
(
cl_id
,
","
.
join
(
first_solutions_score
.
keys
()),
","
.
join
(
second_solutions_score
.
keys
()),
","
.
join
(
first_demands_score
.
keys
()),
","
.
join
(
second_demands_score
.
keys
()),
","
.
join
(
first_positions_score
.
keys
()),
","
.
join
(
second_positions_score
.
keys
()),
","
.
join
(
projects_score
.
keys
()))
#
write_user_portrait(cl_id, ",".join(first_solutions_score.keys()), ",".join(second_solutions_score.keys()),
#
",".join(first_demands_score.keys()), ",".join(second_demands_score.keys()),
#
",".join(first_positions_score.keys()), ",".join(second_positions_score.keys()),
#
",".join(projects_score.keys()))
# write_user_portrait_doris(cl_id, ",".join(first_solutions_score.keys()), ",".join(second_solutions_score.keys()),
# ",".join(first_demands_score.keys()), ",".join(second_demands_score.keys()),
# ",".join(first_positions_score.keys()), ",".join(second_positions_score.keys()),
# ",".join(projects_score.keys()))
#
#
write_user_portrait_doris(cl_id, ",".join(first_solutions_score.keys()), ",".join(second_solutions_score.keys()),
#
#
",".join(first_demands_score.keys()), ",".join(second_demands_score.keys()),
#
#
",".join(first_positions_score.keys()), ",".join(second_positions_score.keys()),
#
#
",".join(projects_score.keys()))
return
cl_id
...
...
@@ -299,6 +305,7 @@ def consume_kafka():
spark
=
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
enableHiveSupport
()
.
getOrCreate
()
spark
.
sparkContext
.
setLogLevel
(
"WARN"
)
spark
.
sparkContext
.
addPyFile
(
"/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py"
)
spark
.
sparkContext
.
addPyFile
(
"/srv/apps/ffm-baseline_git/eda/smart_rank/es_tool.py"
)
device_ids_lst_rdd
=
spark
.
sparkContext
.
parallelize
(
device_ids_lst
,
numSlices
=
1000
)
...
...
@@ -319,10 +326,11 @@ if __name__ == "__main__":
start
=
datetime
.
datetime
.
now
()
# update_tag3_user_portrait("androidid_a25a1129c0b38f7b")
#
device_id = "862460044588666"
#
update_tag3_user_portrait(device_id)
device_id
=
"862460044588666"
update_tag3_user_portrait
(
device_id
)
consume_kafka
()
# TODO
# consume_kafka()
end
=
datetime
.
datetime
.
now
()
print
(
end
-
start
)
print
(
"done"
)
eda/smart_rank/tool.py
View file @
74150978
...
...
@@ -503,3 +503,45 @@ def write_user_portrait_by_event(cl_id, first_solutions, second_solutions, first
except
Exception
as
e
:
print
(
"write db error"
)
print
(
e
)
def
get_redis_client
():
return
redis
.
StrictRedis
.
from_url
(
'redis://:ReDis!GmTx*0aN9@172.16.40.173:6379'
)
def
get_user_portrait_tag3_from_redis
(
device_id
,
limit_score
=
0
):
def
items_gt_score
(
d
):
new_d
=
dict
(
sorted
(
d
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
))
res
=
{
tag
:
float
(
score
)
for
tag
,
score
in
new_d
.
items
()
if
float
(
score
)
>=
limit_score
}
return
list
(
res
.
keys
())
portrait_key
=
"doris:user_portrait:tag3:device_id:"
+
str
(
device_id
)
redis_client
=
get_redis_client
()
if
redis_client
.
exists
(
portrait_key
):
user_portrait
=
json
.
loads
(
redis_client
.
get
(
portrait_key
))
first_demands
=
items_gt_score
(
user_portrait
.
get
(
"first_demands"
,
{}))
second_demands
=
items_gt_score
(
user_portrait
.
get
(
"second_demands"
,
{}))
first_solutions
=
items_gt_score
(
user_portrait
.
get
(
"first_solutions"
,
{}))
second_solutions
=
items_gt_score
(
user_portrait
.
get
(
"second_solutions"
,
{}))
first_positions
=
items_gt_score
(
user_portrait
.
get
(
"first_positions"
,
{}))
second_positions
=
items_gt_score
(
user_portrait
.
get
(
"second_positions"
,
{}))
projects
=
items_gt_score
(
user_portrait
.
get
(
"projects"
,
{}))
anecdote_tags
=
items_gt_score
(
user_portrait
.
get
(
"anecdote_tags"
,
{}))
return
{
"first_demands"
:
first_demands
,
"second_demands"
:
second_demands
,
"first_solutions"
:
first_solutions
,
"second_solutions"
:
second_solutions
,
"first_positions"
:
first_positions
,
"second_positions"
:
second_positions
,
"projects"
:
projects
,
"anecdote_tags"
:
anecdote_tags
}
return
{}
def
get_user_portrait_tag3_with_score
(
device_id
):
portrait_key
=
"doris:user_portrait:tag3:device_id:"
+
str
(
device_id
)
redis_client
=
get_redis_client
()
if
redis_client
.
exists
(
portrait_key
):
return
json
.
loads
(
redis_client
.
get
(
portrait_key
))
return
{}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment