1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import jieba
import jieba.analyse
class GetContentKeyWords(object):
def __init__(self):
self.jeiba = jieba
self.tag_list = ["瘦脸针kyc", "双眼皮kyc", "水光针kyc", "玻尿酸kyc", "吸脂kyc", "祛痘kyc", "鼻综合kyc", "光子嫩肤kyc", "没有想法kyc",
"牙齿kyc", "抗衰紧致kyc", "胸部kyc", "果酸换肤kyc", "祛斑kyc", "美白针kyc", "瘦腿针kyc"]
self.star_list = []
self.synonym_tag_list = []
self.tags = ""
self.stars = ""
self.synonym_tags = ""
def add_tag_word(self):
jieba = self.jeiba
if self.tag_list:
for word in self.tag_list:
jieba.add_word(word)
if self.star_list:
for word in self.star_list:
jieba.add_word(word)
if self.synonym_tag_list:
for word in self.synonym_tag_list:
jieba.add_word(word)
def get(self, content):
self.add_tag_word()
return self.jeiba.cut(content)
def get_keywords(self, content, k=None):
self.add_tag_word()
jieba = self.jeiba
keywords = jieba.analyse.extract_tags(content, topK=9999, withWeight=True, allowPOS=())
tags = []
stars = []
synonym_tags = []
for item in keywords:
if item[0] in self.tag_list:
tags.append(str(item[0]))
if item[0] in self.star_list:
stars.append(str(item[0]))
if item[0] in self.synonym_tag_list:
synonym_tags.append(str(item[0]))
self.tags = ",".join(tags[:k])
self.stars = ",".join(stars[:k])
self.synonym_tags = ",".join(synonym_tags[:k])
# a = GetContentKeyWords()
# a.tag_list = ["天安门", "北京"]
#
# a.get_keywords("我爱北京天安门,天安门上太阳升",2)
# print(a.tags)