import pandas as pd from elasticsearch import Elasticsearch as Es def es_index_adapt(index_prefix, doc_type, rw=None): """get the adapted index name """ assert rw in [None, 'read', 'write'] index = '-'.join((index_prefix, doc_type)) if rw: index = '-'.join((index, rw)) return index def get_es(es_hosts_config=None): init_args = { # 'hosts':settings.ES_HOSTS, # no sniffing 'sniff_on_start': False, 'sniff_on_connection_fail': False, } ES_V2_HOSTS = [ { 'host': '172.16.31.17', 'port': 9000, }, { 'host': '172.16.31.11', 'port': 9000, }, { 'host': '172.16.31.13', 'port': 9000, } ] new_hosts = ES_V2_HOSTS new_es = Es(hosts=new_hosts, **init_args) return new_es def es_query(doc, body, offset, size, es=None): if es is None: es = get_es() index = es_index_adapt( index_prefix='gm-dbmw', doc_type=doc, rw='read' ) res = es.search( index=index, doc_type=doc, timeout='10s', body=body, from_=offset, size=size) number = res["hits"]["total"] return number # total = res["hits"]["hits"] # print(total) # r = [] # for i in total: # r.append(i['_source']['id']) # return r def answer(): tags = ["下颌角切除术", "M唇", "瘦身", "抽脂", "隆胸", "丰乳房", "孕睫术", "眶隔脂肪释放术", "小腿神经阻断术", '瘦脸针', '水光针', '光子嫩肤', '热玛吉', '瘦腿针', '超声刀', '面部吸脂', '瘦肩针', '皮秒', '果酸焕肤', '热拉提', '微针', '牙齿矫正', '超皮秒', '点阵激光', '植发', '小气泡', '双眼皮修复', '自体脂肪隆胸', '鼻翼缩小', '假体隆胸', '玻尿酸丰下巴', '埋线双眼皮', '纹眉', '颧骨内推', '拉皮', '玻尿酸隆鼻', '女性私密紧致', '嗨体', '溶脂针瘦脸', '黄金微针', '磨骨', '肋骨鼻', '洗牙', '植发际线', '光纤溶脂', '点痣', '下颌角切除', '切开双眼皮', '腰腹吸脂', '激光祛斑', '白瓷娃娃', '大腿吸脂', '假体下巴', '除皱针注射', '溶解酶', '吸脂瘦手臂', '微针祛痘坑', '厚唇改薄', '玻尿酸', '大分子玻尿酸', '耳软骨', '鼻中隔软骨', '肋软骨', '硅胶', '膨体', '假体', '自体真皮', '自体脂肪', '自体软骨', '自体血清', '溶解酶', '嗨体', '胶原蛋白', '双美胶原蛋白', '黄金', '药物', '生长因子', '肉毒素', '埋线提升', '悦升线', '蛋白线', '水杨酸', '果酸', '杏仁酸', '奥美定', '干细胞', '纳米树脂', '黑脸娃娃', '眼睑下至', '童颜针', '微笑唇', '减肥', '瘦身', '隆鼻', 'V脸', '祛斑', '祛痣', '祛黑头', '祛疤', '祛痘', '溶脂', '吸脂', '嘟嘟唇', '丰唇', '丰下巴', '丰胸', '皮秒', '蜂巢皮秒', '超皮秒', '深蓝射频', '美瞳', '提眉', '纹眉', '孕睫', '瓷贴面', '全瓷牙', '美容冠', '黄金微雕', '微雕', '削骨', '截骨', '脂肪胶', 'prp', '轮廓针', '水光针', '婴儿针', '三文鱼针', '少女针', '素颜针', '瘦脸针', '熊猫针', '瘦腿针', '小气泡', '正颌', '一针降颧骨', '脱毛', '近视', '面部提升', '嫩肤', '镭射净肤', '红蓝光', '二氧化碳点阵', '狐臭', '清洁', '补水', '内窥镜', '热立塑', '威塑', '优立塑', '酷塑', '调Q激光', 'DPL', '染料激光', '体检', '产后', '正骨术', '隔空溶脂', 'pst', '唇裂', '塑身', '微晶瓷', 'ICL晶体植入', '全飞秒', '半飞秒', '根管治疗', '抗衰', '紧致', '飞梭雷射', '三点双眼皮', '颊脂垫', '嫩红', '眶隔脂肪释放', '针清', '美白', '冷光美白', '美白仓', '小腿神经阻断', '正畸', '变性', '干细胞疗法', '月光脱毛', '火凤凰溶脂', '微拉美', '剥落点阵', '非剥落点阵'] total_list = [] query_operator = "and" query_type = "cross_fields" for i in tags: tmp = [i] query = i star3_q = { "query": {"filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["title^1", "desc^1", "answer^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, {"term": { "content_level": 3 } }] } } } } } tmp.append(es_query('answer', star3_q, 0, 1)) star4_q = { "query": { "filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["title^1", "desc^1", "answer^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, {"term": { "content_level": 4 } }] } } } } } tmp.append(es_query('answer', star4_q, 0, 1)) star5_q = { "query": { "filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["title^1", "desc^1", "answer^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, {"term": { "content_level": 5 } }] } } } } } tmp.append(es_query('answer', star5_q, 0, 1)) video_star3_q = { "query": { "filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["title^1", "desc^1", "answer^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, { "term": { "content_type": 1 } }, {"term": { "content_level": 3 } }] } } } } } tmp.append(es_query('answer', video_star3_q, 0, 1)) video_star4_q = { "query": { "filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["title^1", "desc^1", "answer^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, { "term": { "content_type": 1 } }, {"term": { "content_level": 4 } }] } } } } } tmp.append(es_query('answer', video_star4_q, 0, 1)) video_star5_q = { "query": { "filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["title^1", "desc^1", "answer^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, { "term": { "content_type": 1 } }, {"term": { "content_level": 5 } }] } } } } } tmp.append(es_query('answer', video_star5_q, 0, 1)) total_list.append(tmp) # print(i) # print(tmp) df = pd.DataFrame(total_list) df = df.rename(columns={0: "tag", 1: "star_3", 2: "star_4", 3: "star_5", 4: "video_star_3", 5: "video_star_4", 6: "video_star_5"}) df.to_csv("/home/gmuser/tag_answer.csv", index=False, encoding="utf_8_sig") def question(): tags = ["下颌角切除术", "M唇", "瘦身", "抽脂", "隆胸", "丰乳房", "孕睫术", "眶隔脂肪释放术", "小腿神经阻断术", '瘦脸针', '水光针', '光子嫩肤', '热玛吉', '瘦腿针', '超声刀', '面部吸脂', '瘦肩针', '皮秒', '果酸焕肤', '热拉提', '微针', '牙齿矫正', '超皮秒', '点阵激光', '植发', '小气泡', '双眼皮修复', '自体脂肪隆胸', '鼻翼缩小', '假体隆胸', '玻尿酸丰下巴', '埋线双眼皮', '纹眉', '颧骨内推', '拉皮', '玻尿酸隆鼻', '女性私密紧致', '嗨体', '溶脂针瘦脸', '黄金微针', '磨骨', '肋骨鼻', '洗牙', '植发际线', '光纤溶脂', '点痣', '下颌角切除', '切开双眼皮', '腰腹吸脂', '激光祛斑', '白瓷娃娃', '大腿吸脂', '假体下巴', '除皱针注射', '溶解酶', '吸脂瘦手臂', '微针祛痘坑', '厚唇改薄', '玻尿酸', '大分子玻尿酸', '耳软骨', '鼻中隔软骨', '肋软骨', '硅胶', '膨体', '假体', '自体真皮', '自体脂肪', '自体软骨', '自体血清', '溶解酶', '嗨体', '胶原蛋白', '双美胶原蛋白', '黄金', '药物', '生长因子', '肉毒素', '埋线提升', '悦升线', '蛋白线', '水杨酸', '果酸', '杏仁酸', '奥美定', '干细胞', '纳米树脂', '黑脸娃娃', '眼睑下至', '童颜针', '微笑唇', '减肥', '瘦身', '隆鼻', 'V脸', '祛斑', '祛痣', '祛黑头', '祛疤', '祛痘', '溶脂', '吸脂', '嘟嘟唇', '丰唇', '丰下巴', '丰胸', '皮秒', '蜂巢皮秒', '超皮秒', '深蓝射频', '美瞳', '提眉', '纹眉', '孕睫', '瓷贴面', '全瓷牙', '美容冠', '黄金微雕', '微雕', '削骨', '截骨', '脂肪胶', 'prp', '轮廓针', '水光针', '婴儿针', '三文鱼针', '少女针', '素颜针', '瘦脸针', '熊猫针', '瘦腿针', '小气泡', '正颌', '一针降颧骨', '脱毛', '近视', '面部提升', '嫩肤', '镭射净肤', '红蓝光', '二氧化碳点阵', '狐臭', '清洁', '补水', '内窥镜', '热立塑', '威塑', '优立塑', '酷塑', '调Q激光', 'DPL', '染料激光', '体检', '产后', '正骨术', '隔空溶脂', 'pst', '唇裂', '塑身', '微晶瓷', 'ICL晶体植入', '全飞秒', '半飞秒', '根管治疗', '抗衰', '紧致', '飞梭雷射', '三点双眼皮', '颊脂垫', '嫩红', '眶隔脂肪释放', '针清', '美白', '冷光美白', '美白仓', '小腿神经阻断', '正畸', '变性', '干细胞疗法', '月光脱毛', '火凤凰溶脂', '微拉美', '剥落点阵', '非剥落点阵'] total_list = [] query_operator = "and" query_type = "cross_fields" category = 'question' for i in tags: tmp = [i] query = i q = { "query": {"filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["title^1", "content^1", "tags^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }] } } } } } tmp.append(es_query(category, q, 0, 1)) video_q = { "query": { "filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["title^1", "content^1", "tags^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, { "term": { "content_type": 1 } }] } } } } } tmp.append(es_query(category, video_q, 0, 1)) total_list.append(tmp) # print(i) # print(tmp) df = pd.DataFrame(total_list) df = df.rename(columns={0: "tag", 1: "number", 2: "video_number"}) df.to_csv("/home/gmuser/tag_question.csv", index=False, encoding="utf_8_sig") def topic(): tags = ["下颌角切除术", "M唇", "瘦身", "抽脂", "隆胸", "丰乳房", "孕睫术", "眶隔脂肪释放术", "小腿神经阻断术", '瘦脸针', '水光针', '光子嫩肤', '热玛吉', '瘦腿针', '超声刀', '面部吸脂', '瘦肩针', '皮秒', '果酸焕肤', '热拉提', '微针', '牙齿矫正', '超皮秒', '点阵激光', '植发', '小气泡', '双眼皮修复', '自体脂肪隆胸', '鼻翼缩小', '假体隆胸', '玻尿酸丰下巴', '埋线双眼皮', '纹眉', '颧骨内推', '拉皮', '玻尿酸隆鼻', '女性私密紧致', '嗨体', '溶脂针瘦脸', '黄金微针', '磨骨', '肋骨鼻', '洗牙', '植发际线', '光纤溶脂', '点痣', '下颌角切除', '切开双眼皮', '腰腹吸脂', '激光祛斑', '白瓷娃娃', '大腿吸脂', '假体下巴', '除皱针注射', '溶解酶', '吸脂瘦手臂', '微针祛痘坑', '厚唇改薄', '玻尿酸', '大分子玻尿酸', '耳软骨', '鼻中隔软骨', '肋软骨', '硅胶', '膨体', '假体', '自体真皮', '自体脂肪', '自体软骨', '自体血清', '溶解酶', '嗨体', '胶原蛋白', '双美胶原蛋白', '黄金', '药物', '生长因子', '肉毒素', '埋线提升', '悦升线', '蛋白线', '水杨酸', '果酸', '杏仁酸', '奥美定', '干细胞', '纳米树脂', '黑脸娃娃', '眼睑下至', '童颜针', '微笑唇', '减肥', '瘦身', '隆鼻', 'V脸', '祛斑', '祛痣', '祛黑头', '祛疤', '祛痘', '溶脂', '吸脂', '嘟嘟唇', '丰唇', '丰下巴', '丰胸', '皮秒', '蜂巢皮秒', '超皮秒', '深蓝射频', '美瞳', '提眉', '纹眉', '孕睫', '瓷贴面', '全瓷牙', '美容冠', '黄金微雕', '微雕', '削骨', '截骨', '脂肪胶', 'prp', '轮廓针', '水光针', '婴儿针', '三文鱼针', '少女针', '素颜针', '瘦脸针', '熊猫针', '瘦腿针', '小气泡', '正颌', '一针降颧骨', '脱毛', '近视', '面部提升', '嫩肤', '镭射净肤', '红蓝光', '二氧化碳点阵', '狐臭', '清洁', '补水', '内窥镜', '热立塑', '威塑', '优立塑', '酷塑', '调Q激光', 'DPL', '染料激光', '体检', '产后', '正骨术', '隔空溶脂', 'pst', '唇裂', '塑身', '微晶瓷', 'ICL晶体植入', '全飞秒', '半飞秒', '根管治疗', '抗衰', '紧致', '飞梭雷射', '三点双眼皮', '颊脂垫', '嫩红', '眶隔脂肪释放', '针清', '美白', '冷光美白', '美白仓', '小腿神经阻断', '正畸', '变性', '干细胞疗法', '月光脱毛', '火凤凰溶脂', '微拉美', '剥落点阵', '非剥落点阵'] total_list = [] query_operator = "and" query_type = "cross_fields" category = 'tractate' for i in tags: tmp = [i] query = i star3_q = { "query": {"filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["content^1", "author^1", "tractate_tag_name^1", "tractate_tag_name_content^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, {"term": { "content_level": 3 } }, { "term": { "status": "3" } }] } } } } } tmp.append(es_query(category, star3_q, 0, 1)) star4_q = { "query": {"filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["content^1", "author^1", "tractate_tag_name^1", "tractate_tag_name_content^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, {"term": { "content_level": 4 } }, { "term": { "status": "3" } }] } } } } } tmp.append(es_query(category, star4_q, 0, 1)) star5_q = { "query": {"filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["content^1", "author^1", "tractate_tag_name^1", "tractate_tag_name_content^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, {"term": { "content_level": 5 } }, { "term": { "status": "3" } }] } } } } } tmp.append(es_query(category, star5_q, 0, 1)) video_star3_q = { "query": {"filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["content^1", "author^1", "tractate_tag_name^1", "tractate_tag_name_content^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, {"term": { "content_level": 3 } }, { "term": { "status": "3" } }, {"term": {"is_video": True}}] } } } } } tmp.append(es_query(category, video_star3_q, 0, 1)) video_star4_q = { "query": {"filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["content^1", "author^1", "tractate_tag_name^1", "tractate_tag_name_content^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, {"term": { "content_level": 4 } }, { "term": { "status": "3" } }, {"term": {"is_video": True}}] } } } } } tmp.append(es_query(category, video_star4_q, 0, 1)) video_star5_q = { "query": {"filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["content^1", "author^1", "tractate_tag_name^1", "tractate_tag_name_content^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, {"term": { "content_level": 5 } }, { "term": { "status": "3" } }, {"term": {"is_video": True}}] } } } } } tmp.append(es_query(category, video_star5_q, 0, 1)) total_list.append(tmp) df = pd.DataFrame(total_list) df = df.rename(columns={0: "tag", 1: "star_3", 2: "star_4", 3: "star_5", 4: "video_star_3", 5: "video_star_4", 6: "video_star_5"}) df.to_csv("/home/gmuser/tag_topic.csv", index=False, encoding="utf_8_sig") def test_topic(): query_operator = "and" query_type = "cross_fields" category = 'tractate' query = "眶隔脂肪释放术" star3_q = { "query": {"filtered": { "filter": { "bool": { "must": [{ "multi_match": { "fields": ["content^1", "author^1", "tractate_tag_name^1", "tractate_tag_name_content^1"], "operator": query_operator, "type": query_type, "query": query } }, { "term": { "is_online": True } }, {"term": { "content_level": 3 } }, { "term": { "status": "3" } }] } } } },"_source":{"include":"id"} } print(es_query(category, star3_q, 0, 10)) if __name__ == "__main__": topic() print("topic ok") question() print("ques ok") answer() print("answer ok")