get_keyword.py 1.89 KB
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import jieba
import jieba.analyse


class GetContentKeyWords(object):
    def __init__(self):
        self.jeiba = jieba
        self.tag_list = ["瘦脸针kyc", "双眼皮kyc", "水光针kyc", "玻尿酸kyc", "吸脂kyc", "祛痘kyc", "鼻综合kyc", "光子嫩肤kyc", "没有想法kyc",
                         "牙齿kyc", "抗衰紧致kyc", "胸部kyc", "果酸换肤kyc", "祛斑kyc", "美白针kyc", "瘦腿针kyc"]
        self.star_list = []
        self.synonym_tag_list = []
        self.tags = ""
        self.stars = ""
        self.synonym_tags = ""

    def add_tag_word(self):
        jieba = self.jeiba

        if self.tag_list:
            for word in self.tag_list:
                jieba.add_word(word)

        if self.star_list:
            for word in self.star_list:
                jieba.add_word(word)

        if self.synonym_tag_list:
            for word in self.synonym_tag_list:
                jieba.add_word(word)

    def get(self, content):
        self.add_tag_word()
        return self.jeiba.cut(content)

    def get_keywords(self, content, k=None):
        self.add_tag_word()
        jieba = self.jeiba
        keywords = jieba.analyse.extract_tags(content, topK=9999, withWeight=True, allowPOS=())

        tags = []
        stars = []
        synonym_tags = []

        for item in keywords:
            if item[0] in self.tag_list:
                tags.append(str(item[0]))

            if item[0] in self.star_list:
                stars.append(str(item[0]))

            if item[0] in self.synonym_tag_list:
                synonym_tags.append(str(item[0]))

        self.tags = ",".join(tags[:k])
        self.stars = ",".join(stars[:k])
        self.synonym_tags = ",".join(synonym_tags[:k])

# a = GetContentKeyWords()
# a.tag_list = ["天安门", "北京"]
#
# a.get_keywords("我爱北京天安门,天安门上太阳升",2)
# print(a.tags)