Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
223a1260
Commit
223a1260
authored
5 years ago
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change test file
parent
d592d3f2
master
mr/beta/bug22
offic
rtt
updatedb
zhao
zhao22
1 merge request
!32
新增把esmm排序结果重排
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
150 additions
and
81 deletions
+150
-81
monitor.py
monitor.py
+150
-81
No files found.
monitor.py
View file @
223a1260
...
...
@@ -20,14 +20,7 @@ import datetime
def
Json
(
x
):
data
=
json
.
loads
(
x
[
1
])
if
'type'
in
data
and
'device'
in
data
and
'params'
in
data
and
'card_content_type'
in
data
[
'params'
]:
if
data
[
'type'
]
==
'on_click_card'
and
data
[
'params'
][
'card_content_type'
]
==
'diary'
\
and
data
[
"device"
][
"device_id"
]
==
"E417C286-40A4-42F6-BDA9-AEEBD8FEC3B6"
:
return
True
# elif data['type'] == 'on_click_button'and 'params' in data:
# if 'page_name' in data['params'] and 'button_name' in data['params'] and 'extra_params' in data['params']:
# if data['params']['page_name'] =='page_choose_interest' and data['params']['button_name']=='next':
# return True
if
data
[
'type'
]
==
'on_click_card'
and
data
[
"device"
][
"device_id"
]
==
"E417C286-40A4-42F6-BDA9-AEEBD8FEC3B6"
:
return
True
else
:
return
False
...
...
@@ -35,36 +28,149 @@ def Json(x):
return
False
def
filter_na
(
x
):
if
x
[
0
]
!=
"0"
and
x
[
1
]
is
not
None
:
return
True
else
:
return
False
def
model
(
rdd
):
try
:
rdd
=
rdd
.
filter
(
lambda
x
:
Json
(
x
))
.
repartition
(
10
)
.
map
(
lambda
x
:
get_data
(
x
))
# .map(lambda x:write_redis(x[0],x[1
]))
return
rdd
rdd
=
rdd
.
filter
(
lambda
x
:
Json
(
x
))
.
repartition
(
10
)
.
map
(
lambda
x
:
get_data
(
x
))
\
.
map
(
lambda
x
:
write_redis
(
x
[
0
],
x
[
1
],
x
[
2
]))
return
rdd
except
:
print
(
"fail"
)
def
get_data
(
x
):
try
:
data
=
json
.
loads
(
x
[
1
])
device_id
=
data
[
'device'
][
'device_id'
]
diary_id
=
data
[
'params'
][
"card_id"
]
return
device_id
,
diary_id
card
=
data
[
'params'
][
'card_content_type'
]
return
device_id
,
diary_id
,
card
except
Exception
as
e
:
print
(
"get_data fail"
)
# send_email("get_data", "get_data", e)
def
write_redis
(
device_id
,
cid_list
):
def
write_redis
(
device_id
,
cid
,
card
):
if
card
==
"diary"
:
diary_write
(
device_id
,
cid
)
elif
card
==
"question"
:
question_write
(
device_id
,
cid
)
elif
card
==
"answer"
:
answer_write
(
device_id
,
cid
)
elif
card
==
"user_post"
:
tractate_write
(
device_id
,
cid
)
def
tractate_write
(
device_id
,
cid
):
try
:
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
sql
=
"select b.id from src_mimas_prod_api_tractate_tag a left join src_zhengxing_api_tag b "
\
"on a.tag_id = b.id where b.tag_type = '3' and a.tractate_id = {}"
.
format
(
cid
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
tags
=
result
[
0
][
0
]
if
tags
is
not
None
:
sql
=
"select a.id from src_mimas_prod_api_tractate a left join src_mimas_prod_api_tractate_tag b "
\
"on a.id=b.tractate_id left join src_zhengxing_api_tag c on b.tag_id=c.id "
\
"where a.is_online = 1 and c.id = {} and c.tag_type = '3'"
.
format
(
tags
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
cids
=
list
(
set
([
i
[
0
]
for
i
in
result
]))
if
len
(
cids
)
!=
0
:
r
=
redis
.
Redis
(
host
=
"172.16.40.135"
,
port
=
5379
,
password
=
""
)
key
=
str
(
device_id
)
+
"_dislike_tractate"
if
r
.
exists
(
key
):
value
=
json
.
loads
(
r
.
get
(
key
))
value
.
extend
(
cids
)
cids
=
json
.
dumps
(
list
(
set
(
value
)))
r
.
set
(
key
,
json
.
dumps
(
cids
))
else
:
r
.
set
(
key
,
json
.
dumps
(
cids
))
r
.
expire
(
key
,
7
*
24
*
60
*
60
)
return
"tractate good"
except
Exception
as
e
:
print
(
"tractate insert redis fail"
)
print
(
e
)
def
answer_write
(
device_id
,
cid
):
try
:
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
sql
=
"select c.id from src_mimas_prod_api_answer a left join src_mimas_prod_api_questiontag b "
\
"on a.question_id = b.question_id left join src_zhengxing_api_tag c "
\
"on b.tag_id = c.id where c.tag_type = '3' and a.id = {}"
.
format
(
cid
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
tags
=
result
[
0
][
0
]
if
tags
is
not
None
:
sql
=
"select a.id from src_mimas_prod_api_answer a left join src_mimas_prod_api_questiontag b "
\
"on a.question_id = b.question_id left join src_zhengxing_api_tag c on b.tag_id=c.id "
\
"where a.is_online = 1 and c.id = {} and c.tag_type = '3'"
.
format
(
tags
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
cids
=
list
(
set
([
i
[
0
]
for
i
in
result
]))
if
len
(
cids
)
!=
0
:
r
=
redis
.
Redis
(
host
=
"172.16.40.135"
,
port
=
5379
,
password
=
""
)
key
=
str
(
device_id
)
+
"_dislike_answer"
if
r
.
exists
(
key
):
value
=
json
.
loads
(
r
.
get
(
key
))
value
.
extend
(
cids
)
cids
=
json
.
dumps
(
list
(
set
(
value
)))
r
.
set
(
key
,
json
.
dumps
(
cids
))
else
:
r
.
set
(
key
,
json
.
dumps
(
cids
))
r
.
expire
(
key
,
7
*
24
*
60
*
60
)
return
"answer good"
except
Exception
as
e
:
print
(
"answer insert redis fail"
)
print
(
e
)
def
question_write
(
device_id
,
cid
):
try
:
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
sql
=
"select b.id from src_mimas_prod_api_questiontag a left join src_zhengxing_api_tag b "
\
"on a.tag_id = b.id where b.tag_type = '3' and a.question_id = {}"
.
format
(
cid
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
tags
=
result
[
0
][
0
]
if
tags
is
not
None
:
sql
=
"select a.id from src_mimas_prod_api_question a left join src_mimas_prod_api_questiontag b "
\
"on a.id=b.question_id left join src_zhengxing_api_tag c on b.tag_id=c.id "
\
"where a.is_online = 1 and c.id = {} and c.tag_type = '3'"
.
format
(
tags
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
cids
=
list
(
set
([
i
[
0
]
for
i
in
result
]))
if
len
(
cids
)
!=
0
:
r
=
redis
.
Redis
(
host
=
"172.16.40.135"
,
port
=
5379
,
password
=
""
)
key
=
str
(
device_id
)
+
"_dislike_question"
if
r
.
exists
(
key
):
value
=
json
.
loads
(
r
.
get
(
key
))
value
.
extend
(
cids
)
cids
=
json
.
dumps
(
list
(
set
(
value
)))
r
.
set
(
key
,
json
.
dumps
(
cids
))
else
:
r
.
set
(
key
,
json
.
dumps
(
cids
))
r
.
expire
(
key
,
7
*
24
*
60
*
60
)
return
"question good"
except
Exception
as
e
:
print
(
"question insert redis fail"
)
print
(
e
)
def
diary_write
(
device_id
,
cid
):
try
:
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
sql
=
"select b.id from src_mimas_prod_api_diary_tags a left join src_zhengxing_api_tag b "
\
"on a.tag_id = b.id where b.tag_type = '3' and a.diary_id = {}"
.
format
(
cid
_list
)
"on a.tag_id = b.id where b.tag_type = '3' and a.diary_id = {}"
.
format
(
cid
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -80,75 +186,39 @@ def write_redis(device_id,cid_list):
cids
=
list
(
set
([
i
[
0
]
for
i
in
result
]))
if
len
(
cids
)
!=
0
:
r
=
redis
.
StrictRedis
.
from_url
(
'redis://:ReDis!GmTx*0aN6@172.16.40.133:6379'
)
r
=
redis
.
Redis
(
host
=
"172.16.40.135"
,
port
=
5379
,
password
=
""
)
key
=
str
(
device_id
)
+
"_dislike_diary"
if
r
.
exists
(
key
):
value
=
eval
(
r
.
get
(
key
))
value
=
json
.
loads
(
r
.
get
(
key
))
value
.
extend
(
cids
)
cids
=
json
.
dumps
(
list
(
set
(
value
)))
r
.
set
(
key
,
json
.
dumps
(
cids
))
else
:
r
.
set
(
key
,
json
.
dumps
(
cids
))
r
.
expire
(
key
,
7
*
24
*
60
*
60
)
return
"good"
return
"
diary
good"
except
Exception
as
e
:
print
(
"insert redis fail"
)
print
(
"
diary
insert redis fail"
)
print
(
e
)
def
group_redis
(
x
):
device_id
=
x
[
0
]
cid_list
=
x
[
1
]
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
sql
=
"select b.id from src_mimas_prod_api_diary_tags a left join src_zhengxing_api_tag b "
\
"on a.tag_id = b.id where b.tag_type = '3' and a.diary_id in {}"
.
format
(
tuple
(
cid_list
))
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
tags
=
list
(
set
([
i
[
0
]
for
i
in
result
]))
if
len
(
tags
)
!=
0
:
sql
=
"select a.id from src_mimas_prod_api_diary a left join src_mimas_prod_api_diary_tags b "
\
"on a.id=b.diary_id left join src_zhengxing_api_tag c on b.tag_id=c.id "
\
"where a.is_online = 1 and a.content_level >= '3' "
\
"and c.id in {} and c.tag_type = '3'"
.
format
(
tuple
(
tags
))
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
cids
=
list
(
set
([
i
[
0
]
for
i
in
result
]))
if
len
(
cids
)
>
1
:
r
=
redis
.
StrictRedis
.
from_url
(
'redis://:ReDis!GmTx*0aN6@172.16.40.133:6379'
)
key
=
str
(
device_id
)
+
"_dislike_diary"
if
r
.
exists
(
key
):
value
=
eval
(
r
.
get
(
key
))
value
.
extend
(
cids
)
cids
=
json
.
dumps
(
list
(
set
(
value
)))
r
.
set
(
key
,
json
.
dumps
(
cids
))
else
:
r
.
set
(
key
,
json
.
dumps
(
cids
))
r
.
expire
(
key
,
60
*
60
)
# def group_write(rdd):
# rdd.groupByKey().foreachPartition(lambda x:x.map())
# return "good"
# Spark-Streaming-Kafka
sc
=
SparkContext
(
conf
=
SparkConf
()
.
setMaster
(
"spark://nvwa01:7077"
)
.
setAppName
(
"filter"
)
.
set
(
"spark.io.compression.codec"
,
"lzf"
))
ssc
=
SQLContext
(
sc
)
ssc
=
StreamingContext
(
sc
,
4
)
sc
.
setLogLevel
(
"WARN"
)
kafkaParams
=
{
"metadata.broker.list"
:
"172.16.44.25:9092,172.16.44.31:9092,172.16.44.45:9092"
,
"group.id"
:
"filter"
,
"socket.timeout.ms"
:
"600000"
,
"auto.offset.reset"
:
"largest"
}
stream
=
KafkaUtils
.
createDirectStream
(
ssc
,
[
"gm-maidian-data"
],
kafkaParams
)
transformstream
=
stream
.
transform
(
lambda
x
:
model
(
x
))
.
foreachRDD
(
lambda
x
:
x
.
groupByKey
()
.
foreach
(
group_redis
))
# sc = SparkContext(conf=SparkConf().setMaster("spark://nvwa01:7077").setAppName("filter").set("spark.io.compression.codec", "lzf"))
# ssc = StreamingContext(sc,4)
# sc.setLogLevel("WARN")
# kafkaParams = {"metadata.broker.list": "172.16.44.25:9092,172.16.44.31:9092,172.16.44.45:9092",
# "group.id": "filter",
# "socket.timeout.ms": "600000",
# "auto.offset.reset": "largest"}
#
#
# stream = KafkaUtils.createDirectStream(ssc, ["gm-maidian-data"], kafkaParams)
# transformstream = stream.transform(lambda x:model(x))
# transformstream.pprint()
# print(transformstream)
ssc
.
start
()
ssc
.
awaitTermination
()
\ No newline at end of file
#
# ssc.start()
# ssc.awaitTermination()
print
(
tractate_write
(
"hello"
,
"10078"
))
print
(
tractate_write
(
"hello"
,
37975
))
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment