# coding=utf-8

from __future__ import absolute_import, print_function, unicode_literals

import re
import json

from api.models import City, Province, Country
from rpc.cache import search_dic_cache


DIC_CACHE_KEY = 'dic'


class HospitalNameParser(object):
    """机构名/机构管理者名分词器
    """
    SUFFIX_LIST = [
        '门诊部', '医疗', '诊所', '医院', '整形',
        '外科', '美容', '中心', '有限公司', '医学',
        '口腔', '皮肤', '集团', '中心', '机构',
        '定妆', '抗衰老', '连锁', '门诊', '综合',
        '皮肤科', '整复外科', '牙科', '研究所', '分院',
        '妇幼保健院'
    ]
    CITY_LIST = []
    PROVINCE_LIST = []
    COUNTRY_LIST = []

    SUFFIX_RE = re.compile(r'(%s)+$' % r'|'.join(SUFFIX_LIST))

    _DICT = {
        'city': City.objects.all().values_list('name', flat=True),
        'province': Province.objects.all().values_list('name', flat=True),
        'country': Country.objects.all().values_list('name', flat=True),
    }

    def __init__(self):
        self.CITY_LIST = self._get_areas_by('city')
        self.PROVINCE_LIST = self._get_areas_by('province')
        self.COUNTRY_LIST = self._get_areas_by('country')

    def _get_cache(self, key):
        k = DIC_CACHE_KEY + ':' + key
        v = search_dic_cache.get(k)
        if not v:
            return

        return json.loads(v)

    def _cache(self, key, time, value):
        k = DIC_CACHE_KEY + ':' + key
        value = json.dumps(value)
        search_dic_cache.setex(k, time, value)

    def _get_areas_by(self, t):
        v = self._get_cache(t)
        if v:
            return v

        else:
            v = list(self._DICT[t])
            self._cache(t, 3600, v)
            return v

    def parse(self, name):
        name = name.strip()
        m = re.match(r'(.+)[(（].+[)）]$', name)  # 去掉末尾括号
        if m:
            name = m.group(1).strip()

        # 尝试切后缀
        m_suffix = re.search(self.SUFFIX_RE, name)
        if m_suffix:
            suffix = m_suffix.group(0)
            name = re.sub(self.SUFFIX_RE, '', name)
        else:
            suffix = ''

        # 尝试切城市/省份/国家前缀
        region_prefix = ''
        for city in self.CITY_LIST:
            if name.startswith(city+'市'):
                region_prefix = city+'市'
            elif name.startswith(city):
                region_prefix = city

        if not region_prefix:
            for province in self.PROVINCE_LIST:
                if name.startswith(province+'省'):
                    region_prefix = province+'省'
                elif name.startswith(province):
                    region_prefix = province

        if not region_prefix:
            for country in self.COUNTRY_LIST:
                if name.startswith(country):
                    region_prefix = country

        if region_prefix:
            name = name.replace(region_prefix, '', 1)

        term_vector = [
            region_prefix+name+suffix,  # 全称
            name,  # 核心词
            region_prefix+name,  # 地域+核心词
            name+suffix,  # 核心词+后缀
        ]

        return term_vector


class DoctorNameParser(object):
    PATTERN = re.compile(r'[\u4e00-\u9fa5]+$')

    def parse(self, name):
        name = name.strip()
        r = re.match(r'(.+)[(（].+[)）]', name)
        if r:
            name = r.group(1)
        name = name.strip()
        if re.match(self.PATTERN, name):
            return [name,]
        else:
            return []


class WikiNameParser(object):
    PATTERN = re.compile(r'[0-9a-zA-Z\u4e00-\u9fa5]+')

    def parse(self, name):
        name = name.strip()
        if re.match(self.PATTERN, name):
            return [name,]
        else:
            return []
