Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
9b94cc89
Commit
9b94cc89
authored
5 years ago
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change test file
parent
a8b447e7
master
mr/beta/bug22
offic
rtt
updatedb
zhao
zhao22
1 merge request
!32
新增把esmm排序结果重排
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
620 additions
and
40 deletions
+620
-40
rerank_esmm.py
eda/esmm/Model_pipline/rerank_esmm.py
+223
-0
monitor.py
monitor.py
+39
-12
rerank_esmm.py
rerank_esmm.py
+39
-28
userProfile.py
userProfile.py
+319
-0
No files found.
eda/esmm/Model_pipline/rerank_esmm.py
0 → 100644
View file @
9b94cc89
import
pymysql
import
datetime
import
json
import
redis
import
pandas
as
pd
from
sqlalchemy
import
create_engine
def
get_mysql_data
(
host
,
port
,
user
,
passwd
,
db
,
sql
):
db
=
pymysql
.
connect
(
host
=
host
,
port
=
port
,
user
=
user
,
passwd
=
passwd
,
db
=
db
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
return
result
def
get_esmm_users
():
try
:
stat_date
=
(
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
1
))
.
strftime
(
"
%
Y-
%
m-
%
d"
)
sql
=
"select distinct device_id,city_id from data_feed_exposure_precise "
\
"where stat_date = '{}'"
.
format
(
stat_date
)
result
=
get_mysql_data
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'jerry_prod'
,
sql
)
result
=
list
(
result
)
return
result
except
:
return
[]
def
get_all_users
():
try
:
sql
=
"select distinct device_id,city_id from esmm_device_diary_queue"
result
=
get_mysql_data
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'jerry_test'
,
sql
)
result
=
list
(
result
)
return
result
except
:
return
[]
def
get_user_profile
(
device_id
,
top_k
=
5
):
try
:
r
=
redis
.
Redis
(
host
=
"172.16.40.135"
,
port
=
5379
,
password
=
""
,
db
=
2
)
key
=
"user:portrait_tags:cl_id:"
+
str
(
device_id
)
if
r
.
exists
(
key
):
tmp
=
json
.
loads
(
r
.
get
(
key
)
.
decode
(
'utf-8'
))
tag_score
=
{}
for
i
in
tmp
:
if
i
[
"type"
]
==
"tag"
:
tag_score
[
i
[
"content"
]]
=
i
[
"score"
]
elif
i
[
"content"
]
in
name_tag
.
keys
():
tag_score
[
name_tag
[
i
[
"content"
]]]
=
i
[
"score"
]
tag_sort
=
sorted
(
tag_score
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
tags
=
[]
if
len
(
tag_sort
)
>
top_k
:
for
i
in
range
(
top_k
):
tags
.
append
(
i
[
0
])
else
:
for
i
in
tag_sort
:
tags
.
append
(
i
[
0
])
return
tags
else
:
return
[]
except
:
return
[]
def
get_searchworlds_to_tagid
():
try
:
sql
=
'select id, name from api_tag where is_online = 1 and tag_type < 4'
tag_id
=
get_mysql_data
(
'172.16.30.141'
,
3306
,
'work'
,
'BJQaT9VzDcuPBqkd'
,
'zhengxing'
,
sql
)
searchworlds_to_tagid
=
{}
for
i
in
tag_id
:
searchworlds_to_tagid
[
i
[
1
]]
=
i
[
0
]
return
searchworlds_to_tagid
except
Exception
as
e
:
return
{}
def
get_queues
(
device_id
,
city_id
):
try
:
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cursor
=
db
.
cursor
()
sql
=
"select native_queue, nearby_queue, nation_queue, megacity_queue from esmm_device_diary_queue "
\
"where device_id = '{}' and city_id = '{}'"
.
format
(
device_id
,
city_id
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchone
()
db
.
close
()
if
result
is
not
None
:
return
list
(
result
)
else
:
return
[]
except
:
return
[]
def
tag_boost
(
cid_str
,
tag_list
):
if
cid_str
is
not
None
and
cid_str
!=
""
:
cids
=
cid_str
.
split
(
","
)
try
:
if
len
(
cids
)
>
6
and
len
(
tag_list
)
>
0
:
sql
=
"select id,group_concat(diary_id) from "
\
"(select a.diary_id,b.id from src_mimas_prod_api_diary_tags a left join src_zhengxing_api_tag b "
\
"on a.tag_id = b.id where b.tag_type < '4' and a.diary_id in {}) tmp "
\
"where id in {} group by id"
.
format
(
tuple
(
cids
),
tuple
(
tag_list
))
result
=
get_mysql_data
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'eagle'
,
sql
)
if
len
(
result
)
>
0
:
tag_cids
=
{}
left_cids
=
[]
for
i
in
result
:
tmp
=
i
[
1
]
.
split
(
","
)
tmp
=
[
i
for
i
in
cids
if
i
in
tmp
]
tag_cids
[
i
[
0
]]
=
tmp
left_cids
.
extend
(
tmp
)
left_cids
=
list
(
set
(
left_cids
))
right_cids
=
[
i
for
i
in
cids
if
i
not
in
left_cids
]
tag_cids
[
"right"
]
=
right_cids
tag_list
.
append
(
"right"
)
sort_cids
=
[]
n
=
0
while
n
!=
len
(
tag_cids
)
-
1
:
for
i
in
tag_list
:
if
i
in
tag_cids
.
keys
():
if
len
(
tag_cids
[
i
])
>
0
:
sort_cids
.
append
(
tag_cids
[
i
][
0
])
value
=
tag_cids
[
i
]
value
.
pop
(
0
)
tag_cids
[
i
]
=
value
if
len
(
value
)
==
0
and
i
!=
"right"
:
n
=
n
+
1
if
len
(
tag_cids
[
"right"
])
>
0
:
sort_cids
.
extend
(
tag_cids
[
"right"
])
news_ids
=
[]
for
id
in
sort_cids
:
if
id
not
in
news_ids
:
news_ids
.
append
(
id
)
new_str
=
","
.
join
([
str
(
i
)
for
i
in
news_ids
])
return
new_str
else
:
return
cid_str
else
:
return
cid_str
except
:
#TODO 往sentry发,并且在本地也要打出日志
return
cid_str
else
:
return
cid_str
def
to_data_base
(
df
):
sql
=
"select distinct device_id from esmm_resort_diary_queue"
result
=
get_mysql_data
(
'172.16.40.158'
,
4000
,
'root'
,
'3SYz54LS9#^9sBvC'
,
'jerry_test'
,
sql
)
old_uid
=
[
i
[
0
]
for
i
in
result
]
if
len
(
old_uid
)
>
0
:
old_uid
=
set
(
df
[
"device_id"
]
.
values
)
&
set
(
old_uid
)
old_number
=
len
(
old_uid
)
if
old_number
>
0
:
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
sql
=
"delete from esmm_resort_diary_queue where device_id in {}"
.
format
(
tuple
(
old_uid
))
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
db
.
commit
()
cursor
.
close
()
db
.
close
()
yconnect
=
create_engine
(
'mysql+pymysql://root:3SYz54LS9#^9sBvC@172.16.40.158:4000/jerry_test?charset=utf8'
)
pd
.
io
.
sql
.
to_sql
(
df
,
"esmm_resort_diary_queue"
,
yconnect
,
schema
=
'jerry_test'
,
if_exists
=
'append'
,
index
=
False
,
chunksize
=
200
)
print
(
"insert done"
)
if
__name__
==
"__main__"
:
users_list
=
get_esmm_users
()
print
(
"user number"
)
print
(
len
(
users_list
))
name_tag
=
get_searchworlds_to_tagid
()
n
=
1000
split_users_list
=
[
users_list
[
i
:
i
+
n
]
for
i
in
range
(
0
,
len
(
users_list
),
n
)]
for
child_users_list
in
split_users_list
:
total_samples
=
list
()
for
uid_city
in
child_users_list
:
tag_list
=
get_user_profile
(
uid_city
[
0
])
queues
=
get_queues
(
uid_city
[
0
],
uid_city
[
1
])
if
len
(
queues
)
>
0
and
len
(
tag_list
)
>
0
:
new_native
=
tag_boost
(
queues
[
0
],
tag_list
)
new_nearby
=
tag_boost
(
queues
[
1
],
tag_list
)
insert_time
=
str
(
datetime
.
datetime
.
now
()
.
strftime
(
'
%
Y
%
m
%
d
%
H
%
M'
))
sample
=
[
uid_city
[
0
],
uid_city
[
1
],
new_native
,
new_nearby
,
queues
[
2
],
queues
[
3
],
insert_time
]
total_samples
.
append
(
sample
)
if
len
(
total_samples
)
>
0
:
df
=
pd
.
DataFrame
(
total_samples
)
df
=
df
.
rename
(
columns
=
{
0
:
"device_id"
,
1
:
"city_id"
,
2
:
"native_queue"
,
3
:
"nearby_queue"
,
4
:
"nation_queue"
,
5
:
"megacity_queue"
,
6
:
"time"
})
to_data_base
(
df
)
This diff is collapsed.
Click to expand it.
monitor.py
View file @
9b94cc89
...
@@ -176,21 +176,48 @@ def diary_write(device_id,cid):
...
@@ -176,21 +176,48 @@ def diary_write(device_id,cid):
print
(
e
)
print
(
e
)
sc
=
SparkContext
(
conf
=
SparkConf
()
.
setMaster
(
"spark://nvwa01:7077"
)
.
setAppName
(
"dislike"
)
.
set
(
"spark.io.compression.codec"
,
"lzf"
))
# sc = SparkContext(conf=SparkConf().setMaster("spark://nvwa01:7077").setAppName("dislike").set("spark.io.compression.codec", "lzf"))
ssc
=
StreamingContext
(
sc
,
4
)
# ssc = StreamingContext(sc,4)
sc
.
setLogLevel
(
"WARN"
)
# sc.setLogLevel("WARN")
kafkaParams
=
{
"metadata.broker.list"
:
"172.16.44.25:9092,172.16.44.31:9092,172.16.44.45:9092"
,
# kafkaParams = {"metadata.broker.list": "172.16.44.25:9092,172.16.44.31:9092,172.16.44.45:9092",
"group.id"
:
"dislike"
,
# "group.id": "dislike",
"socket.timeout.ms"
:
"600000"
,
# "socket.timeout.ms": "600000",
"auto.offset.reset"
:
"largest"
}
# "auto.offset.reset": "largest"}
#
#
# stream = KafkaUtils.createDirectStream(ssc, ["gm-maidian-data"], kafkaParams)
# transformstream = stream.transform(lambda x:model(x))
# transformstream.pprint()
#
# ssc.start()
# ssc.awaitTermination()
def
make_data
(
device_id
,
city_id
):
r
=
redis
.
StrictRedis
.
from_url
(
"redis://redis.paas-test.env:6379/2"
)
key
=
"device_diary_queue_rerank:device_id:"
+
device_id
+
":city_id:"
+
city_id
r
.
hset
(
name
=
key
,
key
=
"native_queue"
,
value
=
native
)
r
.
hset
(
name
=
key
,
key
=
"nearby_queue"
,
value
=
nearby
)
r
.
hset
(
name
=
key
,
key
=
"nation_queue"
,
value
=
nation
)
r
.
hset
(
name
=
key
,
key
=
"megacity_queue"
,
value
=
megacity
)
print
(
r
.
hgetall
(
key
))
if
__name__
==
"__main__"
:
native
=
","
.
join
([
str
(
i
)
for
i
in
(
range
(
2
,
6
))])
nearby
=
","
.
join
([
str
(
i
)
for
i
in
(
range
(
6
,
10
))])
nation
=
","
.
join
([
str
(
i
)
for
i
in
(
range
(
10
,
13
))])
megacity
=
","
.
join
([
str
(
i
)
for
i
in
(
range
(
13
,
16
))])
make_data
(
"hello"
,
"beijing"
)
stream
=
KafkaUtils
.
createDirectStream
(
ssc
,
[
"gm-maidian-data"
],
kafkaParams
)
transformstream
=
stream
.
transform
(
lambda
x
:
model
(
x
))
transformstream
.
pprint
()
ssc
.
start
()
ssc
.
awaitTermination
()
...
...
This diff is collapsed.
Click to expand it.
rerank_esmm.py
View file @
9b94cc89
...
@@ -109,6 +109,7 @@ def tag_boost(cid_str, tag_list):
...
@@ -109,6 +109,7 @@ def tag_boost(cid_str, tag_list):
left_cids
=
list
(
set
(
left_cids
))
left_cids
=
list
(
set
(
left_cids
))
right_cids
=
[
i
for
i
in
cids
if
i
not
in
left_cids
]
right_cids
=
[
i
for
i
in
cids
if
i
not
in
left_cids
]
tag_cids
[
"right"
]
=
right_cids
tag_cids
[
"right"
]
=
right_cids
print
(
tag_cids
)
tag_list
.
append
(
"right"
)
tag_list
.
append
(
"right"
)
sort_cids
=
[]
sort_cids
=
[]
n
=
0
n
=
0
...
@@ -171,36 +172,46 @@ def to_data_base(df):
...
@@ -171,36 +172,46 @@ def to_data_base(df):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
users_list
=
get_esmm_users
()
# users_list = get_esmm_users()
print
(
"user number"
)
# print("user number")
print
(
len
(
users_list
))
# print(len(users_list))
#
# name_tag = get_searchworlds_to_tagid()
# n = 1000
# split_users_list = [users_list[i:i + n] for i in range(0, len(users_list), n)]
# for child_users_list in split_users_list:
# total_samples = list()
# for uid_city in child_users_list:
# tag_list = get_user_profile(uid_city[0])
# queues = get_queues(uid_city[0], uid_city[1])
# if len(queues) > 0 and len(tag_list) > 0:
# new_native = tag_boost(queues[0], tag_list)
# new_nearby = tag_boost(queues[1], tag_list)
#
# insert_time = str(datetime.datetime.now().strftime('%Y%m%d%H%M'))
# sample = [uid_city[0], uid_city[1], new_native, new_nearby, queues[2], queues[3], insert_time]
# total_samples.append(sample)
#
# if len(total_samples) > 0:
# df = pd.DataFrame(total_samples)
# df = df.rename(columns={0: "device_id", 1: "city_id",2:"native_queue",
# 3:"nearby_queue",4:"nation_queue",5:"megacity_queue",6:"time"})
#
# to_data_base(df)
name_tag
=
get_searchworlds_to_tagid
()
name_tag
=
get_searchworlds_to_tagid
()
n
=
600
tag_list
=
get_user_profile
(
"00C6F623-297E-4608-9988-9774F503639C"
)
split_users_list
=
[
users_list
[
i
:
i
+
n
]
for
i
in
range
(
0
,
len
(
users_list
),
n
)]
print
(
"用户画像标签:"
)
for
child_users_list
in
split_users_list
:
print
(
tag_list
)
total_samples
=
list
()
queues
=
get_queues
(
"00C6F623-297E-4608-9988-9774F503639C"
,
"nanchang"
)
for
uid_city
in
child_users_list
:
print
(
"排序前:"
)
tag_list
=
get_user_profile
(
uid_city
[
0
])
print
(
queues
[
0
])
queues
=
get_queues
(
uid_city
[
0
],
uid_city
[
1
])
new_native
=
tag_boost
(
queues
[
0
],
tag_list
)
if
len
(
queues
)
>
0
and
len
(
tag_list
)
>
0
:
print
(
"排序后:"
)
new_native
=
tag_boost
(
queues
[
0
],
tag_list
)
print
(
new_native
)
new_nearby
=
tag_boost
(
queues
[
1
],
tag_list
)
insert_time
=
str
(
datetime
.
datetime
.
now
()
.
strftime
(
'
%
Y
%
m
%
d
%
H
%
M'
))
sample
=
[
uid_city
[
0
],
uid_city
[
1
],
new_native
,
new_nearby
,
queues
[
2
],
queues
[
3
],
insert_time
]
total_samples
.
append
(
sample
)
print
(
len
(
total_samples
))
if
len
(
total_samples
)
>
0
:
df
=
pd
.
DataFrame
(
total_samples
)
df
=
df
.
rename
(
columns
=
{
0
:
"device_id"
,
1
:
"city_id"
,
2
:
"native_queue"
,
3
:
"nearby_queue"
,
4
:
"nation_queue"
,
5
:
"megacity_queue"
,
6
:
"time"
})
to_data_base
(
df
)
print
(
"good boy"
)
# TODO to kv
...
...
This diff is collapsed.
Click to expand it.
userProfile.py
View file @
9b94cc89
...
@@ -143,3 +143,322 @@ def fetch_user_profile(device_id):
...
@@ -143,3 +143,322 @@ def fetch_user_profile(device_id):
for
i
in
user_profile
.
columns
:
for
i
in
user_profile
.
columns
:
user_profile_dict
[
i
]
=
user_profile
.
loc
[
0
,
i
]
user_profile_dict
[
i
]
=
user_profile
.
loc
[
0
,
i
]
return
user_profile_dict
,
False
return
user_profile_dict
,
False
def
fetch_qa
(
device_id
,
card_type
,
size
):
try
:
key
=
'{device_id}-{card_type}-{date}'
.
format
(
device_id
=
device_id
,
card_type
=
card_type
,
date
=
RecommendFeed
.
current_date
())
if
(
device_id
!=
'0'
):
search_qa_recommend_key
=
"TS:search_recommend_answer_queue:device_id:"
+
str
(
device_id
)
search_qa_recommend_list
=
list
()
search_cursor_ts
=
0
if
redis_client
.
exists
(
search_qa_recommend_key
):
search_qa_recommend_dict
=
redis_client
.
hgetall
(
search_qa_recommend_key
)
if
b
'cursor'
in
search_qa_recommend_dict
:
search_cursor_ts
=
json
.
loads
(
search_qa_recommend_dict
[
b
'cursor'
])
if
search_cursor_ts
<
10
:
search_qa_recommend_list
=
json
.
loads
(
search_qa_recommend_dict
[
b
'answer_queue'
])
if
search_cursor_ts
<
len
(
search_qa_recommend_list
):
size
=
size
-
1
try
:
que
=
DeviceQAQueue
.
objects
.
get
(
device_id
=
device_id
)
except
DeviceQAQueue
.
DoesNotExist
:
que
=
AnswerQueue
.
objects
.
last
()
if
not
que
:
return
[]
que
=
list
(
filter
(
None
,
que
.
queue
.
split
(
','
)))
# adjust args.
cursor
=
redis_client
.
get
(
key
)
or
0
cursor
=
int
(
cursor
)
%
len
(
que
)
size
=
min
(
size
,
len
(
que
))
# redis_client.set(key, cursor + size, ex=24 * 60 * 60)
data
=
list
(
islice
(
cycle
(
que
),
cursor
,
cursor
+
size
))
data
=
list
(
map
(
int
,
data
))
if
cursor
+
2
*
size
<
len
(
que
):
redis_client
.
set
(
key
,
cursor
+
size
,
ex
=
24
*
60
*
60
)
else
:
try
:
context
.
request_logger
.
app
(
reset_answer_queue
=
True
)
cursor
=
0
redis_client
.
set
(
key
,
cursor
,
ex
=
24
*
60
*
60
)
except
:
redis_client
.
set
(
key
,
cursor
+
size
,
ex
=
24
*
60
*
60
)
if
device_id
!=
'0'
:
if
len
(
search_qa_recommend_list
)
>
0
and
search_cursor_ts
<
len
(
search_qa_recommend_list
):
queue
=
search_qa_recommend_list
[
search_cursor_ts
:
search_cursor_ts
+
1
]
queue
.
extend
(
data
)
data
=
queue
new_search_cursor
=
search_cursor_ts
+
1
redis_client
.
hset
(
search_qa_recommend_key
,
'cursor'
,
new_search_cursor
)
redis_client
.
expire
(
search_qa_recommend_key
,
30
*
24
*
60
*
60
)
read_qa_key
=
"TS:recommend_answer_set:device_id:"
+
str
(
device_id
)
if
len
(
data
)
>
0
:
redis_client
.
sadd
(
read_qa_key
,
*
data
)
return
data
except
:
logging_exception
()
return
[]
def
fetch_user_topic
(
device_id
,
card_type
,
size
):
try
:
key
=
'{device_id}-{card_type}-{date}'
.
format
(
device_id
=
device_id
,
card_type
=
card_type
,
date
=
RecommendFeed
.
current_date
())
if
(
device_id
!=
'0'
)
and
size
>=
2
:
search_topic_recommend_key
=
"TS:search_recommend_tractate_queue:device_id:"
+
str
(
device_id
)
search_topic_recommend_list
=
list
()
search_cursor_ts
=
0
if
redis_client
.
exists
(
search_topic_recommend_key
):
search_topic_recommend_dict
=
redis_client
.
hgetall
(
search_topic_recommend_key
)
if
b
'cursor'
in
search_topic_recommend_dict
:
search_cursor_ts
=
json
.
loads
(
search_topic_recommend_dict
[
b
'cursor'
])
if
search_cursor_ts
<
30
:
search_topic_recommend_list
=
json
.
loads
(
search_topic_recommend_dict
[
b
'tractate_queue'
])
if
search_cursor_ts
<
len
(
search_topic_recommend_list
):
size
=
size
-
2
try
:
que
=
DeviceUserTopicQueue
.
objects
.
get
(
device_id
=
device_id
)
except
DeviceUserTopicQueue
.
DoesNotExist
:
que
=
UserTopicQueue
.
objects
.
last
()
if
not
que
:
return
[]
que
=
list
(
filter
(
None
,
que
.
queue
.
split
(
','
)))
# adjust args.
cursor
=
redis_client
.
get
(
key
)
or
0
cursor
=
int
(
cursor
)
%
len
(
que
)
size
=
min
(
size
,
len
(
que
))
data
=
list
(
islice
(
cycle
(
que
),
cursor
,
cursor
+
size
))
data
=
list
(
map
(
int
,
data
))
if
cursor
+
2
*
size
<
len
(
que
):
redis_client
.
set
(
key
,
cursor
+
size
,
ex
=
24
*
60
*
60
)
else
:
try
:
context
.
request_logger
.
app
(
reset_queue
=
True
)
cursor
=
0
redis_client
.
set
(
key
,
cursor
,
ex
=
24
*
60
*
60
)
except
:
redis_client
.
set
(
key
,
cursor
+
size
,
ex
=
24
*
60
*
60
)
if
device_id
!=
'0'
and
size
>=
2
:
if
len
(
search_topic_recommend_list
)
>
0
and
search_cursor_ts
<
len
(
search_topic_recommend_list
):
queue
=
search_topic_recommend_list
[
search_cursor_ts
:
search_cursor_ts
+
2
]
queue
.
extend
(
data
)
data
=
queue
new_search_cursor
=
search_cursor_ts
+
2
redis_client
.
hset
(
search_topic_recommend_key
,
'cursor'
,
new_search_cursor
)
redis_client
.
expire
(
search_topic_recommend_key
,
30
*
24
*
60
*
60
)
read_topic_key
=
"TS:recommend_tractate_set:device_id:"
+
str
(
device_id
)
if
len
(
data
)
>
0
:
redis_client
.
sadd
(
read_topic_key
,
*
data
)
return
data
except
:
logging_exception
()
return
[]
def
fetch_diary
(
cls
,
device_id
,
card_type
,
city_id
,
size
):
# first, we fetch data from personal-queue city-queue, if not both, get data
# from world queue.
user_portrait_diary_part_list
=
list
()
click_diary_size
=
1
search_diary_size
=
4
if
device_id
!=
'0'
:
user_portrait_diary_key
=
'user_portrait_recommend_diary_queue:device_id:
%
s:
%
s'
%
(
device_id
,
datetime
.
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
d'
))
if
redis_client
.
exists
(
user_portrait_diary_key
):
user_portrait_diary_dict
=
redis_client
.
hgetall
(
user_portrait_diary_key
)
user_portrait_cursor
=
str
(
user_portrait_diary_dict
[
b
'cursor'
],
encoding
=
'utf-8'
)
if
user_portrait_cursor
==
'0'
:
if
b
'len_cursor'
in
user_portrait_diary_dict
.
keys
():
user_portrait_diary_list
=
json
.
loads
(
user_portrait_diary_dict
[
b
'diary_queue'
])
len_cursor
=
str
(
user_portrait_diary_dict
[
b
'len_cursor'
],
encoding
=
'utf-8'
)
len_cursor
=
int
(
len_cursor
)
if
len
(
user_portrait_diary_list
)
-
len_cursor
>
size
:
user_portrait_diary_part_list
=
user_portrait_diary_list
[
len_cursor
:
len_cursor
+
size
]
redis_client
.
hset
(
user_portrait_diary_key
,
'len_cursor'
,
len_cursor
+
size
)
size
=
0
else
:
user_portrait_diary_list
=
json
.
loads
(
user_portrait_diary_dict
[
b
'diary_queue'
])
diary_list_len
=
len
(
user_portrait_diary_list
)
-
len_cursor
size
=
size
-
diary_list_len
user_portrait_diary_part_list
=
user_portrait_diary_list
[
len_cursor
:
len_cursor
+
diary_list_len
]
redis_client
.
hset
(
user_portrait_diary_key
,
'len_cursor'
,
len_cursor
+
diary_list_len
)
user_portrait_cursor
=
int
(
user_portrait_cursor
)
+
1
redis_client
.
hset
(
user_portrait_diary_key
,
'cursor'
,
user_portrait_cursor
)
else
:
user_portrait_diary_part_list
=
json
.
loads
(
user_portrait_diary_dict
[
b
'diary_queue'
])
size
=
size
-
len
(
user_portrait_diary_part_list
)
user_portrait_cursor
=
int
(
user_portrait_cursor
)
+
1
redis_client
.
hset
(
user_portrait_diary_key
,
'cursor'
,
user_portrait_cursor
)
try
:
# obj = DeviceDiaryQueue.objects.filter(device_id=device_id, city_id=city_id).first()
(
local
,
nearby
,
nation
,
megacity
,
city_id
)
=
cls
.
fetch_device_diary_queue_data
(
city_id
,
device_id
)
if
len
(
local
)
==
0
and
len
(
nearby
)
==
0
and
len
(
nation
)
==
0
and
len
(
megacity
)
==
0
:
(
local
,
nearby
,
nation
,
megacity
,
city_id
)
=
cls
.
fetch_diary_queue_data
(
city_id
)
# if not obj:
# (local, nearby, nation, megacity,city_id) = cls.fetch_diary_queue_data(city_id)
# else:
# local = list(filter(None, obj.native_queue.split(','))) if obj.native_queue else []
# nearby = list(filter(None, obj.nearby_queue.split(','))) if obj.nearby_queue else []
# nation = list(filter(None, obj.nation_queue.split(','))) if obj.nation_queue else []
# megacity = list(filter(None, obj.megacity_queue.split(','))) if obj.megacity_queue else []
except
:
logging_exception
()
(
local
,
nearby
,
nation
,
megacity
,
city_id
)
=
cls
.
fetch_diary_queue_data
(
city_id
)
if
(
device_id
!=
'0'
):
search_diary_recommend_key
=
"TS:search_recommend_diary_queue:device_id:"
+
str
(
device_id
)
search_diary_recommend_list
=
list
()
search_cursor_ts
=
0
if
redis_client
.
exists
(
search_diary_recommend_key
)
and
size
>
3
:
search_diary_recommend_dict
=
redis_client
.
hgetall
(
search_diary_recommend_key
)
if
b
'cursor'
in
search_diary_recommend_dict
:
search_cursor_ts
=
json
.
loads
(
search_diary_recommend_dict
[
b
'cursor'
])
search_diary_recommend_list
=
json
.
loads
(
search_diary_recommend_dict
[
b
'diary_queue'
])
if
search_cursor_ts
+
search_diary_size
<
len
(
search_diary_recommend_list
)
:
size
=
size
-
search_diary_size
if
(
device_id
!=
'0'
)
:
diary_recommend_key
=
"TS:recommend_diary_queue:device_id:"
+
str
(
device_id
)
diary_recommend_list
=
list
()
if
redis_client
.
exists
(
diary_recommend_key
)
and
size
>
0
:
diary_recommend_dict
=
redis_client
.
hgetall
(
diary_recommend_key
)
diary_recommend_list
=
json
.
loads
(
diary_recommend_dict
[
b
'diary_queue'
])
if
len
(
diary_recommend_list
)
>
0
:
size
=
size
-
click_diary_size
key
=
'{device_id}-{city_id}-{date}'
.
format
(
device_id
=
device_id
,
city_id
=
city_id
,
date
=
RecommendFeed
.
current_date
())
# strategy rule: when user refresh over 30 loadings, reset native nearby nation queue cursor.
counter_key
=
key
+
'-counter_v1'
counter
=
redis_client
.
incr
(
counter_key
)
if
counter
==
1
:
redis_client
.
expire
(
counter_key
,
24
*
60
*
60
)
cursor_key
=
key
+
'-cursor_v1'
cursor
=
redis_client
.
get
(
cursor_key
)
or
b
'0-0-0-0'
# if counter > 30:
# cursor = b'0-0-0-0'
# redis_client.delete(counter_key)
cx
,
cy
,
cm
,
cz
=
map
(
int
,
cursor
.
split
(
b
'-'
))
x
,
y
,
m
,
z
=
cls
.
get_city_scale
(
city_id
)
data
,
ncx
,
ncy
,
ncm
,
ncz
=
cls
.
get_scale_data
(
local
,
nearby
,
nation
,
megacity
,
cx
,
cy
,
cm
,
cz
,
x
,
y
,
z
,
m
,
size
)
if
ncx
==
cx
and
ncy
==
cy
:
# native queue and nearby queue
logger
.
info
(
"diary queue reach end,cx:
%
d,cy:
%
d,cm:
%
d,cz:
%
d"
,
cx
,
cy
,
cm
,
cz
)
# redis_client.delete(counter_key)
# data, ncx, ncy, ncm, ncz = cls.get_scale_data(
# local, nearby, nation, megacity,
# 0, 0, 0, 0,
# x, y, z, m, size
# )
ncx
=
ncy
=
ncm
=
ncz
=
0
val
=
'-'
.
join
(
map
(
str
,
[
ncx
,
ncy
,
ncm
,
ncz
]))
redis_client
.
set
(
cursor_key
,
val
,
ex
=
24
*
60
*
60
)
data
=
list
(
map
(
int
,
data
))
if
device_id
!=
'0'
:
if
search_cursor_ts
<
len
(
search_diary_recommend_list
)
-
search_diary_size
:
queue
=
search_diary_recommend_list
[
search_cursor_ts
:
search_cursor_ts
+
search_diary_size
]
queue
.
extend
(
data
)
data
=
queue
new_search_cursor
=
search_cursor_ts
+
search_diary_size
redis_client
.
hset
(
search_diary_recommend_key
,
'cursor'
,
new_search_cursor
)
redis_client
.
expire
(
search_diary_recommend_key
,
30
*
24
*
60
*
60
)
if
len
(
diary_recommend_list
)
>
0
:
diary_id
=
diary_recommend_list
.
pop
(
0
)
data
.
insert
(
0
,
diary_id
)
if
len
(
diary_recommend_list
)
>
0
:
diary_recommend_list_json
=
json
.
dumps
(
diary_recommend_list
)
redis_client
.
hset
(
diary_recommend_key
,
'diary_queue'
,
diary_recommend_list_json
)
redis_client
.
expire
(
diary_recommend_key
,
30
*
24
*
60
*
60
)
else
:
redis_client
.
delete
(
diary_recommend_key
)
if
len
(
user_portrait_diary_part_list
)
>
0
:
user_portrait_diary_part_list
.
extend
(
data
)
data
=
user_portrait_diary_part_list
#已读
read_diary_key
=
"TS:recommend_diary_set:device_id:"
+
str
(
device_id
)
if
len
(
data
)
>
0
:
redis_client
.
sadd
(
read_diary_key
,
*
data
)
return
data
def
get_scale_data
(
local
,
nearby
,
nation
,
megacity
,
cx
,
cy
,
cm
,
cz
,
x
,
y
,
z
,
m
,
size
):
"""
:param local: local diary queue
:param nearby: nearby diary queue
:param nation: nation diary queue
:param megacity: megacity diary queue
:param cx: seen local diary offset
:param cy: seen nearby diary offset
:param cz: seen nation diary offset
:param cm: seen megacity diary offset
:param x: local diary scale factor
:param y: nearby diary scale factor
:param z: nation diary scale factor
:param m: megacity diary scale factor
:param size: nubmer of diary
:return:
"""
# 本地 临近 特大城市 全国 四个层级 都按照的是四舍五入取得方式
# 针对出现的问题,本次相应的优化是:
# 1、如果出现两个层级为零,且有剩余坑位时,则按照本地 临近 全国的优先级,先给优先级高且为零的层级一个坑位。
# 2、如果所有层级都非零,且有剩余坑位时,则优先给权重占比大的层级一个坑位。
# 3、如果只有一个层级为零,且有剩余坑位时,则优先填充权重占比大的层级一个坑位。
nx
=
int
(
round
(
x
*
1.0
/
(
x
+
y
+
z
+
m
)
*
size
))
ny
=
int
(
round
(
y
*
1.0
/
(
x
+
y
+
z
+
m
)
*
size
))
nz
=
int
(
round
(
z
*
1.0
/
(
x
+
y
+
z
+
m
)
*
size
))
nm
=
int
(
round
(
m
*
1.0
/
(
x
+
y
+
z
+
m
)
*
size
))
nxyz
=
[
nx
,
ny
,
nm
,
nz
]
xyz
=
[
x
,
y
,
m
,
z
]
counter
=
Counter
([
nx
,
ny
,
nm
,
nz
])
if
counter
[
0
]
==
2
:
nxyz
[
nxyz
.
index
(
0
)]
+=
size
-
sum
(
nxyz
)
else
:
nxyz
[
xyz
.
index
(
max
(
xyz
))]
+=
size
-
sum
(
nxyz
)
nx
,
ny
,
nm
,
nz
=
nxyz
slocal
=
local
[
cx
:
cx
+
nx
]
cx
=
min
(
cx
+
nx
,
len
(
local
))
ny
+=
(
nx
-
len
(
slocal
))
snearby
=
nearby
[
cy
:
cy
+
ny
]
cy
=
min
(
cy
+
ny
,
len
(
nearby
))
nm
+=
(
ny
-
len
(
snearby
))
smegacity
=
megacity
[
cm
:
cm
+
nm
]
cm
=
min
(
cm
+
nm
,
len
(
megacity
))
nz
+=
(
nm
-
len
(
smegacity
))
snation
=
nation
[
cz
:
cz
+
nz
]
cz
=
min
(
cz
+
nz
,
len
(
nation
))
return
chain
(
slocal
,
snearby
,
smegacity
,
snation
),
cx
,
cy
,
cm
,
cz
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment