Commit 1e5e7374 authored by crazyer's avatar crazyer

text miner

parent d898fad8
...@@ -22,7 +22,7 @@ class StopwordsFilter(Filter): ...@@ -22,7 +22,7 @@ class StopwordsFilter(Filter):
super(StopwordsFilter, self).__init__(file_path, encoding) super(StopwordsFilter, self).__init__(file_path, encoding)
self.init() self.init()
def remove_irregular_chars(self, corpus: str): def remove_irregular_chars(self, corpus):
return re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", corpus) return re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", corpus)
def init(self): def init(self):
......
...@@ -22,7 +22,7 @@ class StopwordsFilter(Filter): ...@@ -22,7 +22,7 @@ class StopwordsFilter(Filter):
super(StopwordsFilter, self).__init__(file_path, encoding) super(StopwordsFilter, self).__init__(file_path, encoding)
self.init() self.init()
def remove_irregular_chars(self, corpus: str): def remove_irregular_chars(self, corpus):
return re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", corpus) return re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", corpus)
def init(self): def init(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment