Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
physical
Commits
a5ccccd8
Commit
a5ccccd8
authored
Apr 11, 2019
by
段英荣
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modify huabao bug
parent
877571ac
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
89 additions
and
69 deletions
+89
-69
tasks.py
injection/data_sync/tasks.py
+15
-11
register_user_tag.py
linucb/utils/register_user_tag.py
+49
-39
collect_data.py
linucb/views/collect_data.py
+8
-6
linucb.py
linucb/views/linucb.py
+7
-2
pictorial.py
trans2es/models/pictorial.py
+1
-1
topic.py
trans2es/models/topic.py
+8
-9
pictorial_transfer.py
trans2es/utils/pictorial_transfer.py
+1
-1
No files found.
injection/data_sync/tasks.py
View file @
a5ccccd8
...
...
@@ -11,22 +11,26 @@ import traceback
from
libs.cache
import
redis_client
from
trans2es.models.face_user_contrast_similar
import
FaceUserContrastSimilar
,
UserSimilarScore
import
json
from
linucb.utils.register_user_tag
import
RegisterUserTag
@shared_task
def
write_to_es
(
es_type
,
pk_list
,
use_batch_query_set
=
False
):
try
:
pk_list
=
list
(
frozenset
(
pk_list
))
type_info_map
=
get_type_info_map
()
type_info
=
type_info_map
[
es_type
]
logging
.
info
(
"consume es_type:
%
s"
%
str
(
es_type
))
type_info
.
insert_table_by_pk_list
(
sub_index_name
=
es_type
,
pk_list
=
pk_list
,
use_batch_query_set
=
use_batch_query_set
,
es
=
ESPerform
.
get_cli
()
)
if
es_type
==
"register_user_tag"
:
RegisterUserTag
.
get_register_user_tag
(
pk_list
)
else
:
type_info_map
=
get_type_info_map
()
type_info
=
type_info_map
[
es_type
]
logging
.
info
(
"consume es_type:
%
s"
%
str
(
es_type
))
type_info
.
insert_table_by_pk_list
(
sub_index_name
=
es_type
,
pk_list
=
pk_list
,
use_batch_query_set
=
use_batch_query_set
,
es
=
ESPerform
.
get_cli
()
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
linucb/utils/register_user_tag.py
View file @
a5ccccd8
...
...
@@ -28,54 +28,64 @@ class RegisterUserTag(object):
linucb_device_id_register_tag_topic_id_prefix
=
"physical:linucb:register_tag_topic_recommend:device_id:"
linucb_user_id_register_tag_topic_id_prefix
=
"physical:linucb:register_tag_topic_recommend:user_id:"
linucb_register_user_tag_key
=
"physical:linucb:register_user_tag_info"
@classmethod
def
get_register_user_tag
(
cls
,
pk_list
):
try
:
user_id_set
=
set
()
# user_id_set = set()
user_id_dict
=
dict
()
query_results
=
AccountUserTag
.
objects
.
filter
(
pk__in
=
pk_list
)
for
item
in
query_results
:
tag_id
=
item
.
tag_id
user_id
=
item
.
user
if
user_id
not
in
user_id_set
:
user_id_set
.
add
(
user_id
)
user_tag_list
=
AccountUserTag
.
objects
.
filter
(
user
=
user_id
)
.
values_list
(
"tag_id"
,
flat
=
True
)
have_read_topic_id_list
=
Tools
.
get_have_read_topic_id_list
(
-
1
,
user_id
,
TopicPageType
.
HOME_RECOMMEND
)
recommend_topic_id_list
=
list
()
cycle_num
=
int
(
10000
/
len
(
user_tag_list
))
for
index
in
range
(
0
,
cycle_num
):
for
tag_id
in
user_tag_list
:
redis_tag_id_key
=
cls
.
tag_topic_id_redis_prefix
+
str
(
tag_id
)
redis_tag_id_data
=
redis_client
.
get
(
redis_tag_id_key
)
tag_topic_id_list
=
json
.
loads
(
redis_tag_id_data
)
if
redis_tag_id_data
else
[]
if
not
redis_tag_id_data
:
tag_topic_id_list
=
ESPerform
.
get_tag_topic_list
(
tag_id
)
redis_client
.
set
(
redis_tag_id_key
,
json
.
dumps
(
tag_topic_id_list
))
redis_client
.
expire
(
redis_tag_id_key
,
1
*
24
*
60
*
60
)
if
len
(
tag_topic_id_list
)
>
index
:
for
topic_id
in
tag_topic_id_list
[
index
:]:
if
topic_id
not
in
have_read_topic_id_list
and
topic_id
not
in
recommend_topic_id_list
:
recommend_topic_id_list
.
append
(
topic_id
)
break
redis_register_tag_topic_data
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
cls
.
linucb_user_id_register_tag_topic_id_prefix
,
redis_register_tag_topic_data
)
redis_client
.
expire
(
cls
.
linucb_user_id_register_tag_topic_id_prefix
,
30
*
24
*
60
*
60
)
if
user_id
not
in
user_id_dict
:
user_id_dict
[
user_id
]
=
set
()
user_id_dict
[
user_id
]
.
add
(
tag_id
)
topic_recommend_redis_key
=
cls
.
linucb_user_id_recommend_topic_id_prefix
+
str
(
user_id
)
redis_data_dict
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
topic_recommend_redis_key
,
redis_data_dict
)
redis_client
.
expire
(
topic_recommend_redis_key
,
30
*
24
*
60
*
60
)
for
user_id
in
user_id_dict
:
redis_client
.
hset
(
cls
.
linucb_register_user_tag_key
,
user_id
,
json
.
dumps
(
list
(
user_id_dict
[
user_id
])))
# if user_id not in user_id_set:
# user_id_set.add(user_id)
#
# user_tag_list = AccountUserTag.objects.filter(user=user_id).values_list("tag_id",flat=True)
#
# have_read_topic_id_list = Tools.get_have_read_topic_id_list(-1, user_id,
# TopicPageType.HOME_RECOMMEND)
# recommend_topic_id_list = list()
# cycle_num = int(10000/len(user_tag_list))
# for index in range(0,cycle_num):
# for tag_id in user_tag_list:
# redis_tag_id_key = cls.tag_topic_id_redis_prefix + str(tag_id)
# redis_tag_id_data = redis_client.get(redis_tag_id_key)
# tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
# if not redis_tag_id_data:
# tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id)
# redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
# redis_client.expire(redis_tag_id_key,1*24*60*60)
#
# if len(tag_topic_id_list)>index:
# for topic_id in tag_topic_id_list[index:]:
# if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list:
# recommend_topic_id_list.append(topic_id)
# break
#
# redis_register_tag_topic_data = {
# "data": json.dumps(recommend_topic_id_list),
# "cursor": 0
# }
# redis_client.hmset(cls.linucb_user_id_register_tag_topic_id_prefix,redis_register_tag_topic_data)
# redis_client.expire(cls.linucb_user_id_register_tag_topic_id_prefix,30*24*60*60)
#
# topic_recommend_redis_key = cls.linucb_user_id_recommend_topic_id_prefix + str(user_id)
# redis_data_dict = {
# "data": json.dumps(recommend_topic_id_list),
# "cursor":0
# }
# redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
# redis_client.expire(topic_recommend_redis_key,30*24*60*60)
#
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
linucb/views/collect_data.py
View file @
a5ccccd8
...
...
@@ -50,21 +50,21 @@ class CollectData(object):
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
dict
()
def
update_recommend_tag_list
(
self
,
device_id
,
user_feature
=
None
):
def
update_recommend_tag_list
(
self
,
device_id
,
user_feature
=
None
,
user_id
=
None
):
try
:
recommend_tag_set
=
set
()
recommend_tag_list
=
list
()
recommend_tag_dict
=
dict
()
redis_linucb_tag_data_dict
=
self
.
_get_user_linucb_info
(
device_id
)
if
len
(
redis_linucb_tag_data_dict
)
==
0
:
recommend_tag_list
=
LinUCB
.
get_default_tag_list
()
recommend_tag_list
=
LinUCB
.
get_default_tag_list
(
user_id
)
LinUCB
.
init_device_id_linucb_info
(
redis_client
,
self
.
linucb_matrix_redis_prefix
,
device_id
,
recommend_tag_list
)
else
:
user_feature
=
user_feature
if
user_feature
else
self
.
user_feature
(
recommend_tag_dict
,
recommend_tag_set
)
=
LinUCB
.
linucb_recommend_tag
(
device_id
,
redis_linucb_tag_data_dict
,
user_feature
,
list
(
redis_linucb_tag_data_dict
.
keys
()))
recommend_tag_list
=
list
(
recommend_tag_dict
.
keys
())
if
len
(
recommend_tag_dict
)
>
0
:
recommend_tag_list
=
list
(
recommend_tag_set
)
if
len
(
recommend_tag_list
)
>
0
:
tag_recommend_redis_key
=
self
.
linucb_recommend_redis_prefix
+
str
(
device_id
)
redis_client
.
set
(
tag_recommend_redis_key
,
json
.
dumps
(
recommend_tag_list
))
# Todo:设置过期时间,调研set是否支持
...
...
@@ -131,6 +131,7 @@ class CollectData(object):
if
"type"
in
raw_val_dict
and
"on_click_feed_topic_card"
==
raw_val_dict
[
"type"
]:
topic_id
=
raw_val_dict
[
"params"
][
"business_id"
]
or
raw_val_dict
[
"params"
][
"topic_id"
]
device_id
=
raw_val_dict
[
"device"
][
"device_id"
]
user_id
=
raw_val_dict
[
"user_id"
]
if
"user_id"
in
raw_val_dict
else
None
logging
.
info
(
"consume topic_id:
%
s,device_id:
%
s"
%
(
str
(
topic_id
),
str
(
device_id
)))
...
...
@@ -151,7 +152,7 @@ class CollectData(object):
self
.
update_user_linucb_tag_info
(
reward
,
device_id
,
tag_id
,
user_feature
)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self
.
update_recommend_tag_list
(
device_id
,
user_feature
)
self
.
update_recommend_tag_list
(
device_id
,
user_feature
,
user_id
)
elif
"type"
in
raw_val_dict
and
"page_precise_exposure"
==
raw_val_dict
[
"type"
]:
if
isinstance
(
raw_val_dict
[
"params"
][
"exposure_cards"
],
str
):
exposure_cards_list
=
json
.
loads
(
raw_val_dict
[
"params"
][
"exposure_cards"
])
...
...
@@ -160,6 +161,7 @@ class CollectData(object):
else
:
exposure_cards_list
=
list
()
device_id
=
raw_val_dict
[
"device"
][
"device_id"
]
user_id
=
raw_val_dict
[
"user_id"
]
if
"user_id"
in
raw_val_dict
else
None
exposure_topic_id_list
=
list
()
for
item
in
exposure_cards_list
:
...
...
@@ -193,7 +195,7 @@ class CollectData(object):
self
.
update_user_linucb_tag_info
(
reward
,
device_id
,
tag_id
,
user_feature
)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self
.
update_recommend_tag_list
(
device_id
,
user_feature
)
self
.
update_recommend_tag_list
(
device_id
,
user_feature
,
user_id
)
else
:
logging
.
warning
(
"unknown type msg:
%
s"
%
raw_val_dict
.
get
(
"type"
,
"missing type"
))
except
:
...
...
linucb/views/linucb.py
View file @
a5ccccd8
...
...
@@ -20,11 +20,16 @@ class LinUCB:
default_tag_list
=
list
()
@classmethod
def
get_default_tag_list
(
cls
):
def
get_default_tag_list
(
cls
,
user_id
):
try
:
if
len
(
cls
.
default_tag_list
)
==
0
:
cls
.
default_tag_list
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
is_online
=
True
,
collection
=
1
)
.
values_list
(
"id"
,
flat
=
True
)[
0
:
100
]
if
user_id
:
redis_tag_data
=
redis_client
.
hget
(
"physical:linucb:register_user_tag_info"
,
user_id
)
cls
.
default_tag_list
=
json
.
loads
(
redis_tag_data
)
if
redis_tag_data
else
[]
if
len
(
cls
.
default_tag_list
)
==
0
:
cls
.
default_tag_list
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
is_online
=
True
,
collection
=
1
)
.
values_list
(
"id"
,
flat
=
True
)[
0
:
100
]
return
cls
.
default_tag_list
except
:
...
...
trans2es/models/pictorial.py
View file @
a5ccccd8
...
...
@@ -80,7 +80,7 @@ class Pictorial(models.Model):
for
topic_id
in
topic_id_list
:
topic_id_object
=
Topic
.
objects
.
filter
(
id
=
int
(
topic_id
))
.
first
()
if
topic_id_object
and
topic_id_object
.
is_online
and
topic_id_object
.
content_level
in
[
0
,
3
,
4
,
5
]:
if
topic_id_object
and
topic_id_object
.
is_online
and
int
(
topic_id_object
.
content_level
)
in
[
0
,
3
,
4
,
5
]:
effective_num
+=
1
if
effective_num
>=
5
:
ret
=
True
...
...
trans2es/models/topic.py
View file @
a5ccccd8
...
...
@@ -197,14 +197,14 @@ class Topic(models.Model):
elif
self
.
content_level
==
'3'
:
offline_score
+=
2.0
exposure_count
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
1
)
.
count
()
click_count
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
2
)
.
count
()
uv_num
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
3
)
.
count
()
if
exposure_count
>
0
:
offline_score
+=
click_count
/
exposure_count
if
uv_num
>
0
:
offline_score
+=
(
self
.
vote_num
/
uv_num
+
self
.
reply_num
/
uv_num
)
#
exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
#
click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
#
uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
#
#
if exposure_count > 0:
#
offline_score += click_count / exposure_count
#
if uv_num > 0:
#
offline_score += (self.vote_num / uv_num + self.reply_num / uv_num)
"""
1:马甲账号是否对总分降权?
...
...
@@ -247,7 +247,6 @@ class PictorialTopic(models.Model):
pictorial_id
=
models
.
BigIntegerField
(
verbose_name
=
u'画报ID'
)
topic_id
=
models
.
BigIntegerField
(
verbose_name
=
u'帖子ID'
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u"是否有效"
,
default
=
True
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u'是否上线'
)
is_deleted
=
models
.
BooleanField
(
verbose_name
=
u'是否删除'
)
...
...
trans2es/utils/pictorial_transfer.py
View file @
a5ccccd8
...
...
@@ -37,7 +37,7 @@ class PictorialTransfer(object):
res
[
"tag_id"
]
=
tag_id
res
[
"tag_name"
]
=
instance
.
get_tag_by_name
(
tag_id
)
res
[
"topic_id_list"
]
=
instance
.
get_topic_id
()
#
res["effective"] = instance.get_effective(res["topic_id_list"])
res
[
"effective"
]
=
instance
.
get_effective
(
res
[
"topic_id_list"
])
return
res
except
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment