Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
4f420ce0
Commit
4f420ce0
authored
Jul 03, 2019
by
lixiaofang
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into vote_topic
parents
230e12fb
7be80ee9
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
145 additions
and
85 deletions
+145
-85
tasks.py
injection/data_sync/tasks.py
+10
-4
collect_data.py
linucb/views/collect_data.py
+69
-56
search_hotword.py
search/views/search_hotword.py
+13
-17
sl_user_login_status.sql
sqls/deploy/sl_user_login_status.sql
+16
-0
sl_user_login_status.sql
sqls/revert/sl_user_login_status.sql
+7
-0
sqitch.conf
sqls/sqitch.conf
+8
-0
sqitch.plan
sqls/sqitch.plan
+5
-0
sl_user_login_status.sql
sqls/verify/sl_user_login_status.sql
+8
-0
pictorial.json
trans2es/mapping/pictorial.json
+3
-3
pictorial.py
trans2es/models/pictorial.py
+1
-1
tag.py
trans2es/models/tag.py
+5
-4
No files found.
injection/data_sync/tasks.py
View file @
4f420ce0
...
...
@@ -15,7 +15,7 @@ from libs.es import ESPerform
from
libs.cache
import
redis_client
from
trans2es.models.face_user_contrast_similar
import
FaceUserContrastSimilar
,
UserSimilarScore
from
linucb.utils.register_user_tag
import
RegisterUserTag
from
trans2es.models.tag
import
RegisterShowTa
g
,
Tag
from
trans2es.models.tag
import
SettingsConfi
g
,
Tag
@shared_task
...
...
@@ -113,16 +113,22 @@ def sync_user_similar_score():
def
get_tag_count
():
try
:
# 获取搜索推荐热词
results_registr_tag
=
list
(
set
(
RegisterShowTag
.
objects
.
filter
(
is_deleted
=
False
,
is_online
=
1
)
.
values_list
(
"tag_id"
,
flat
=
True
)))
# results_registr_tag = list(set(SettingsConfig.objects.filter(is_deleted=False).values_list("val", flat=True)))
# tag_val_list =set()
# for item in results_registr_tag:
# for word in item.split():
# tag_val_list.add(word)
# 获取符合条件的核心词
results_tag
=
list
(
set
(
Tag
.
objects
.
filter
(
is_online
=
True
,
is_deleted
=
False
,
collection
=
1
)
.
values_list
(
"id"
,
flat
=
True
)))
results_tag
=
list
(
set
(
Tag
.
objects
.
filter
(
is_online
=
True
,
is_deleted
=
False
,
collection
=
1
)
.
values_list
(
"id"
,
flat
=
True
)))
redis_registr_tag
=
"physical:search_hotword:results_registr_tag"
redis_tag
=
"physical:search_hotword:results_tag"
redis_client
.
set
(
redis_registr_tag
,
list
(
results_registr_tag
))
#
redis_client.set(redis_registr_tag, list(results_registr_tag))
redis_client
.
set
(
redis_tag
,
list
(
results_tag
))
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
linucb/views/collect_data.py
View file @
4f420ce0
...
...
@@ -164,10 +164,23 @@ class CollectData(object):
user_id
=
raw_val_dict
[
"user_id"
]
if
"user_id"
in
raw_val_dict
else
None
logging
.
info
(
"consume topic_id:
%
s,device_id:
%
s"
%
(
str
(
topic_id
),
str
(
device_id
)))
topic_tag_list
=
list
(
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
topic_id
,
is_online
=
True
)
.
values_list
(
"tag_id"
,
flat
=
True
))
tag_query_results
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
id__in
=
topic_tag_list
,
is_online
=
True
,
is_deleted
=
False
)
.
values_list
(
"id"
,
"collection"
,
"is_ai"
)
for
id
,
collection
,
is_ai
in
tag_query_results
:
if
collection
and
is_ai
:
# topic_tag_list = list(TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id,is_online=True).values_list("tag_id",flat=True))
# tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=topic_tag_list,is_online=True,is_deleted=False).values_list("id","collection","is_ai")
# for id,collection,is_ai in tag_query_results:
# if collection and is_ai:
# click_topic_tag_list.append(id)
topic_tag_list
=
list
()
click_results
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
topic_id
,
is_online
=
True
)
.
values_list
(
"tag_id"
,
"is_collection"
)
for
tag_id
,
is_collection
in
click_results
:
topic_tag_list
.
append
(
tag_id
)
if
is_collection
:
click_topic_tag_list
.
append
(
tag_id
)
tag_query_results
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
id__in
=
topic_tag_list
,
is_online
=
True
,
is_deleted
=
False
)
.
values_list
(
"id"
,
"is_ai"
)
for
id
,
is_ai
in
tag_query_results
:
if
is_ai
:
click_topic_tag_list
.
append
(
id
)
logging
.
info
(
"positive tag_list,device_id:
%
s,topic_id:
%
s,tag_list:
%
s"
%
(
...
...
@@ -197,58 +210,58 @@ class CollectData(object):
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
if
len
(
click_topic_tag_list
)
>
0
:
self
.
update_recommend_tag_list
(
device_id
,
user_feature
,
user_id
,
click_topic_tag_list
=
click_topic_tag_list
)
elif
"type"
in
raw_val_dict
and
"page_precise_exposure"
==
raw_val_dict
[
"type"
]:
if
isinstance
(
raw_val_dict
[
"params"
][
"exposure_cards"
],
str
):
exposure_cards_list
=
json
.
loads
(
raw_val_dict
[
"params"
][
"exposure_cards"
])
elif
isinstance
(
raw_val_dict
[
"params"
][
"exposure_cards"
],
list
):
exposure_cards_list
=
raw_val_dict
[
"params"
][
"exposure_cards"
]
else
:
exposure_cards_list
=
list
()
device_id
=
raw_val_dict
[
"device"
][
"device_id"
]
user_id
=
raw_val_dict
[
"user_id"
]
if
"user_id"
in
raw_val_dict
else
None
logging
.
warning
(
"type msg:
%
s"
%
raw_val_dict
.
get
(
"type"
))
exposure_topic_id_list
=
list
()
for
item
in
exposure_cards_list
:
if
"card_id"
not
in
item
:
continue
exposure_topic_id
=
item
[
"card_id"
]
logging
.
info
(
"consume exposure topic_id:
%
s,device_id:
%
s"
%
(
str
(
exposure_topic_id
),
str
(
device_id
)))
if
exposure_topic_id
:
exposure_topic_id_list
.
append
(
exposure_topic_id
)
topic_tag_id_dict
=
dict
()
tag_list
=
list
()
exposure_sql_query_results
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
\
filter
(
topic_id__in
=
exposure_topic_id_list
)
.
\
values_list
(
"topic_id"
,
"tag_id"
,
"is_online"
,
"is_collection"
)
# if len(exposure_sql_query_results)>0:
for
topic_id
,
tag_id
,
is_online
,
is_collection
in
exposure_sql_query_results
:
if
is_online
and
is_collection
==
1
:
tag_list
.
append
(
tag_id
)
if
is_online
:
tag_sql_query_results
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
id
=
tag_id
)
.
values_list
(
"id"
,
"collection"
,
"is_ai"
)
for
id
,
collection
,
is_ai
in
tag_sql_query_results
:
if
(
is_ai
==
1
)
and
id
not
in
tag_list
:
tag_list
.
append
(
id
)
if
topic_id
not
in
topic_tag_id_dict
:
topic_tag_id_dict
[
topic_id
]
=
list
()
topic_tag_id_dict
[
topic_id
]
.
append
(
tag_id
)
is_click
=
0
is_vote
=
0
reward
=
1
if
is_click
or
is_vote
else
0
logging
.
info
(
"negative tag_list,device_id:
%
s,topic_tag_id_dict:
%
s"
%
(
str
(
device_id
),
str
(
topic_tag_id_dict
)))
for
tag_id
in
tag_list
:
self
.
update_user_linucb_tag_info
(
reward
,
device_id
,
tag_id
,
user_feature
)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self
.
update_recommend_tag_list
(
device_id
,
user_feature
,
user_id
)
#
elif "type" in raw_val_dict and "page_precise_exposure" == raw_val_dict["type"]:
#
if isinstance(raw_val_dict["params"]["exposure_cards"],str):
#
exposure_cards_list = json.loads(raw_val_dict["params"]["exposure_cards"])
#
elif isinstance(raw_val_dict["params"]["exposure_cards"],list):
#
exposure_cards_list = raw_val_dict["params"]["exposure_cards"]
#
else:
#
exposure_cards_list = list()
#
device_id = raw_val_dict["device"]["device_id"]
#
user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
#
logging.warning("type msg:%s" % raw_val_dict.get("type"))
#
exposure_topic_id_list = list()
#
for item in exposure_cards_list:
#
if "card_id" not in item:
#
continue
#
exposure_topic_id = item["card_id"]
#
logging.info(
#
"consume exposure topic_id:%s,device_id:%s" % (str(exposure_topic_id), str(device_id)))
#
if exposure_topic_id:
#
exposure_topic_id_list.append(exposure_topic_id)
#
#
topic_tag_id_dict = dict()
#
tag_list = list()
#
exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).\
#
filter(topic_id__in=exposure_topic_id_list).\
#
values_list("topic_id","tag_id","is_online","is_collection")
#
# if len(exposure_sql_query_results)>0:
#
for topic_id,tag_id,is_online,is_collection in exposure_sql_query_results:
#
if is_online and is_collection == 1:
#
tag_list.append(tag_id)
#
if is_online:
#
tag_sql_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(
#
id=tag_id).values_list("id", "collection", "is_ai")
#
for id, collection, is_ai in tag_sql_query_results:
#
if (is_ai == 1) and id not in tag_list:
#
tag_list.append(id)
#
#
if topic_id not in topic_tag_id_dict:
#
topic_tag_id_dict[topic_id] = list()
#
topic_tag_id_dict[topic_id].append(tag_id)
#
#
is_click = 0
#
is_vote = 0
#
#
reward = 1 if is_click or is_vote else 0
#
#
logging.info("negative tag_list,device_id:%s,topic_tag_id_dict:%s" % (
#
str(device_id), str(topic_tag_id_dict)))
#
for tag_id in tag_list:
#
self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
#
#
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
#
self.update_recommend_tag_list(device_id, user_feature, user_id)
elif
"type"
in
raw_val_dict
and
"interest_choice_click_next"
==
raw_val_dict
[
"type"
]:
if
isinstance
(
raw_val_dict
[
"params"
][
"tagid_list"
],
str
):
tagid_list
=
json
.
loads
(
raw_val_dict
[
"params"
][
"tagid_list"
])
...
...
search/views/search_hotword.py
View file @
4f420ce0
...
...
@@ -12,7 +12,7 @@ from search.utils.group import GroupUtils
from
search.utils.common
import
GroupSortTypes
from
libs.es
import
ESPerform
from
trans2es.models.pictorial
import
PictorialTopics
from
trans2es.models.tag
import
RegisterShowTa
g
,
Tag
from
trans2es.models.tag
import
SettingsConfi
g
,
Tag
from
libs.cache
import
redis_client
...
...
@@ -33,25 +33,20 @@ def search_hotword(device_id=-1):
"""
try
:
all_tag_name_list
=
set
()
results_registr_tag
=
json
.
loads
(
redis_client
.
get
(
"physical:search_hotword:results_registr_tag"
))
#
results_registr_tag = json.loads(redis_client.get("physical:search_hotword:results_registr_tag"))
results_tag
=
json
.
loads
(
redis_client
.
get
(
"physical:search_hotword:results_tag"
))
# 先获取搜索推荐热词
for
num
in
range
(
0
,
len
(
results_registr_tag
)
-
1
):
tag_id
=
random
.
randint
(
0
,
len
(
results_registr_tag
)
-
1
)
results_tag_chose
=
list
(
set
(
RegisterShowTag
.
objects
.
filter
(
tag_id
=
results_registr_tag
[
tag_id
],
is_online
=
True
)
.
values_list
(
"tag_id"
,
flat
=
True
)))
if
results_tag_chose
:
results_tag_recommend
=
list
(
set
(
Tag
.
objects
.
filter
(
id
=
results_tag_chose
[
0
],
is_online
=
True
)
.
values_list
(
"name"
,
flat
=
True
)))
if
results_tag_recommend
:
all_tag_name_list
.
add
(
results_tag_recommend
[
0
])
if
len
(
all_tag_name_list
)
==
6
or
num
==
results_tag
:
break
results_registr_tag
=
list
(
set
(
SettingsConfig
.
objects
.
filter
(
is_deleted
=
False
,
key
=
1
)
.
values_list
(
"val"
,
flat
=
True
)))
tag_val_list
=
set
()
for
item
in
results_registr_tag
:
for
word
in
item
.
split
():
tag_val_list
.
add
(
word
)
tag_id_list
=
random
.
sample
(
range
(
0
,
len
(
tag_val_list
)),
6
)
for
tag_id
in
tag_id_list
:
tag_val
=
list
(
tag_val_list
)[
tag_id
]
all_tag_name_list
.
add
(
tag_val
)
logging
.
info
(
"get all_tag_name_list:
%
s"
%
all_tag_name_list
)
# 获取个性化标签
linucb_recommend_redis_prefix
=
"physical:linucb:tag_recommend:device_id:"
tag_recommend_redis_key
=
linucb_recommend_redis_prefix
+
str
(
device_id
)
...
...
@@ -84,3 +79,4 @@ def search_hotword(device_id=-1):
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"recommend_tag_name"
:
[]}
sqls/deploy/sl_user_login_status.sql
0 → 100644
View file @
4f420ce0
-- Deploy flipr:sl_user_login_status to mysql
BEGIN
;
CREATE
TABLE
`sl_user_login_status`
(
`user_id`
varchar
(
100
)
NOT
NULL
COMMENT
'用户ID'
,
`is_shadow`
tinyint
(
1
)
NOT
NULL
COMMENT
'是否是马甲用户'
,
`first_visit_day`
date
COMMENT
'首次日期'
,
`last_visit_day`
date
COMMENT
'最后一次登陆日期'
,
`day_id`
varchar
(
10
)
NOT
NULL
COMMENT
'数据账期'
,
PRIMARY
KEY
(
`user_id`
),
INDEX
`lv_day`
(
`last_visit_day`
)
)
ENGINE
=
InnoDB
DEFAULT
CHARSET
=
utf8mb4
COMMENT
=
'数据仓库推送表用户登录状态表'
;
-- XXX Add DDLs here.
COMMIT
;
sqls/revert/sl_user_login_status.sql
0 → 100644
View file @
4f420ce0
-- Revert flipr:sl_user_login_status from mysql
BEGIN
;
DROP
table
sl_user_login_status
;
-- XXX Add DDLs here.
COMMIT
;
sqls/sqitch.conf
0 → 100644
View file @
4f420ce0
[
core
]
engine
=
mysql
# plan_file = sqitch.plan
# top_dir = .
# [engine "mysql"]
# target = db:mysql:
# registry = sqitch
# client = /usr/local/mysql/bin/mysql
sqls/sqitch.plan
0 → 100644
View file @
4f420ce0
%syntax-version=1.0.0
%project=flipr
%uri=https://github.com/sqitchers/sqitch-mysql-intro/
sl_user_login_status 2019-06-25T11:06:15Z Lxrent <lxrent@lxrentdeMacBook-Pro.local> # 数据仓库推送表用户登录状态表
sqls/verify/sl_user_login_status.sql
0 → 100644
View file @
4f420ce0
-- Verify flipr:sl_user_login_status on mysql
BEGIN
;
SELECT
user_id
FROM
sl_user_login_status
;
-- XXX Add verifications here.
ROLLBACK
;
trans2es/mapping/pictorial.json
View file @
4f420ce0
...
...
@@ -5,8 +5,8 @@
"is_online"
:{
"type"
:
"boolean"
},
//上线
"is_deleted"
:{
"type"
:
"boolean"
},
"is_recommend"
:{
"type"
:
"boolean"
},
"name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_
index
"
},
"description"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_
index
"
},
"name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_
search
"
},
"description"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_
search
"
},
"topic_num"
:{
"type"
:
"long"
},
"creator_id"
:{
"type"
:
"long"
},
"icon"
:{
"type"
:
"text"
},
...
...
@@ -14,7 +14,7 @@
"create_time"
:{
"type"
:
"date"
,
"format"
:
"date_time_no_millis"
},
"update_time"
:{
"type"
:
"date"
,
"format"
:
"date_time_no_millis"
},
"tag_id"
:{
"type"
:
"long"
},
"tag_name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_
index
"
},
"tag_name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_
search
"
},
"topic_id_list"
:{
"type"
:
"long"
},
"effective"
:{
"type"
:
"boolean"
},
"offline_score"
:{
"type"
:
"long"
},
...
...
trans2es/models/pictorial.py
View file @
4f420ce0
...
...
@@ -78,7 +78,7 @@ class Pictorial(models.Model):
for
topic_id
in
topic_id_list
:
topic_id_object
=
Topic
.
objects
.
filter
(
id
=
int
(
topic_id
))
.
first
()
if
topic_id_object
and
topic_id_object
.
is_online
and
int
(
topic_id_object
.
content_level
)
in
[
0
,
3
,
4
,
5
]:
if
topic_id_object
and
topic_id_object
.
is_online
and
int
(
topic_id_object
.
content_level
)
in
[
0
,
3
,
4
,
5
,
6
]:
effective_num
+=
1
if
effective_num
>=
5
:
ret
=
True
...
...
trans2es/models/tag.py
View file @
4f420ce0
...
...
@@ -96,13 +96,14 @@ class CommunityTagType(models.Model):
update_time
=
models
.
DateTimeField
(
verbose_name
=
u'更新时间'
,
default
=
datetime
.
datetime
.
fromtimestamp
(
0
))
class
RegisterShowTa
g
(
models
.
Model
):
class
SettingsConfi
g
(
models
.
Model
):
class
Meta
:
verbose_name
=
"搜索热词"
db_table
=
"
register_show_ta
g"
db_table
=
"
settingsconfi
g"
id
=
models
.
IntegerField
(
primary_key
=
True
,
verbose_name
=
u"主键ID"
)
is_deleted
=
models
.
IntegerField
(
verbose_name
=
u"是否删除"
)
is_online
=
models
.
IntegerField
(
verbose_name
=
u"是否在线"
)
tag_id
=
models
.
IntegerField
(
verbose_name
=
"标签ID"
)
key
=
models
.
IntegerField
(
verbose_name
=
u"值"
)
val
=
models
.
IntegerField
(
verbose_name
=
"标签内容"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment