Commit 699d6624 authored by 段英荣's avatar 段英荣

调试

parent 926cf800
...@@ -7,7 +7,10 @@ from __future__ import print_function ...@@ -7,7 +7,10 @@ from __future__ import print_function
import numpy as np import numpy as np
import redis import redis
# from libs.cache import redis_client import sys
sys.path.append("/home/gmuser/gm_mab/")
from libs.cache import redis_client
import logging import logging
import traceback import traceback
import json import json
...@@ -17,9 +20,6 @@ import random ...@@ -17,9 +20,6 @@ import random
import time import time
redis_url = "redis://:ReDis!GmTx*0aN9@172.16.40.173:6379"
redis_client = redis.StrictRedis.from_url(redis_url)
class LinUCB: class LinUCB:
d = 6 d = 6
alpha = 0.01 alpha = 0.01
......
...@@ -6,8 +6,10 @@ from __future__ import division ...@@ -6,8 +6,10 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import redis import sys
# from libs.cache import redis_client sys.path.append("/home/gmuser/gm_mab/")
from libs.cache import redis_client
import logging import logging
import traceback import traceback
import json import json
...@@ -17,7 +19,7 @@ import random ...@@ -17,7 +19,7 @@ import random
import time import time
import pymysql import pymysql
from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import LabelEncoder
from elasticsearch import Elasticsearch
class Generate_Feature_Info(object): class Generate_Feature_Info(object):
...@@ -27,8 +29,9 @@ class Generate_Feature_Info(object): ...@@ -27,8 +29,9 @@ class Generate_Feature_Info(object):
password = "BJQaT9VzDcuPBqkd" password = "BJQaT9VzDcuPBqkd"
database = "zhengxing" database = "zhengxing"
# 新项目标签encode对象 redis_name_content_linucb_feature_prefix = "strategy:linucb:feature:content_type:"
tagv3_label_encoder = None redis_name_user_linucb_feature_prefix = "strategy:linucb:feature:user"
@classmethod @classmethod
def get_tagv3_word_list(cls): def get_tagv3_word_list(cls):
...@@ -67,8 +70,94 @@ class Generate_Feature_Info(object): ...@@ -67,8 +70,94 @@ class Generate_Feature_Info(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return None return None
@classmethod
def generate_content_feature_to_redis(cls,label_encoder,content_type="diary"):
try:
redis_name_content_linucb_feature = cls.redis_name_content_linucb_feature_prefix + content_type
es = Elasticsearch([
{
'host': '172.16.31.17',
'port': 9000,
}
])
page = es.search(
index='gm-dbmw-diary-read',
doc_type='diary',
scroll='10m',
search_type='scan',
size=10,
body={
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{"term": {"is_online": True}},
{"terms": {"content_level": [5,6]}}
]
}
}
}
},
"_source": {"include": ["id", "tags_v3", "content_level"]}
}
)
sid = page['_scroll_id']
scroll_size = page['hits']['total']
while (scroll_size > 0):
try:
page = es.scroll(scroll_id=sid, scroll='10m')
sid = page['_scroll_id']
scroll_size = len(page['hits']['hits'])
for item in page['hits']['hits']:
diary_id = item["_source"]["id"]
tags_v3 = item["_source"]["tags_v3"]
content_level = item["_source"]["content_level"] if item["_source"]["content_level"] else -1
offi_tags_v3 = list()
for tag_item in tags_v3:
offi_tags_v3.append(tag_item.encode("utf-8"))
tag_label = -1
if len(offi_tags_v3)>0:
tag_label = label_encoder.transform(test_val_list)[0]
diary_feature_list = [tag_label,content_level]
redis_client.hset(redis_name_content_linucb_feature,diary_id,json.dumps(diary_feature_list))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def _get_user_portrait_tag3_redis_key(cls,device_id):
return "doris:user_portrait:tag3:device_id:" + str(device_id)
@classmethod
def generate_user_feature_to_redis(cls,device_id,label_encoder):
try:
redis_name_user_linucb_feature = cls.redis_name_user_linucb_feature_prefix
user_portrait_redis_name = cls._get_user_portrait_tag3_redis_key(device_id)
user_portrait_redis_data = redis_client.get(user_portrait_redis_name)
user_portrait_dict = json.loads(user_portrait_redis_data) if user_portrait_redis_data else dict()
user_tag_label = -1
if len(user_portrait_dict)>0:
sorted_user_portrait_list = sorted(user_portrait_dict["projects"].items(), key=lambda x: x[1], reverse=True)
user_tag_label = label_encoder.transform(sorted_user_portrait_list[0][0])[0]
user_feature_list = [user_tag_label]
redis_client.hset(redis_name_user_linucb_feature,device_id,json.dumps(user_feature_list))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -79,6 +168,8 @@ if __name__ == "__main__": ...@@ -79,6 +168,8 @@ if __name__ == "__main__":
label_encoder = Generate_Feature_Info.get_tagv3_label_encode(tagv3_name_list=list(tagv3_name_set)) label_encoder = Generate_Feature_Info.get_tagv3_label_encode(tagv3_name_list=list(tagv3_name_set))
test_val = "切开双眼皮" device_id="868771031984211"
Generate_Feature_Info.generate_user_feature_to_redis(device_id,label_encoder)
label_encoder.transform(test_val) # test_val_list = ["切开双眼皮"]
#
# label_results_list = label_encoder.transform(test_val_list)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment