# -*- coding: utf-8 -*-
"""
Created on Mon Feb 25 15:11:06 2019

@author: hanye
"""

import datetime
import numpy as np

VECTOR_LEN = 1440


def strToVector(vector_str, sep='__'):
    """
    vector value string to vector. Input
    vector_str is all vector values connected by sep str.
    """
    vector_str_Lst = vector_str.split(sep)
    vector = np.zeros(VECTOR_LEN, dtype=np.float)
    for ii in range(len(vector_str_Lst)):
        vector[ii] = float(vector_str_Lst[ii])
    return vector


def vectorToCorLst(vector, sep='__'):
    vectStr = vectorToCor(vector, sep=sep)
    vectStrLst = vectStr.split('__')
    vectCorIntLst = list(map(lambda cor: int(cor), vectStrLst))
    vectStrLstSorted = sorted(vectCorIntLst)
    return vectStrLstSorted


def vectorToCor(vector, sep='__'):
    """"""
    corLst = []
    for vectorIdx in range(len(vector)):
        if vector[vectorIdx] != 0:
            corLst.append(str(vectorIdx))
    corStr = sep.join(corLst)
    return corStr


def corStrToCorLst(vector_str, sep='__'):
    vectStrLst = vector_str.split(sep)
    vectCorIntLst = list(map(lambda cor: int(cor), vectStrLst))
    vectStrLstSorted = sorted(vectCorIntLst)
    return vectStrLstSorted


def corToVector(vector_str, sep='__'):
    """
    corrdinates string to vector. Input vector_str is all vector index (corrdinates)
    for destination vector.
    """

    vector_str_Lst = vector_str.split(sep)
    vector = np.zeros(VECTOR_LEN, dtype=np.int)
    for vv in vector_str_Lst:
        try:
            vvi = int(vv)
            vector[vvi] = 1
        except ValueError:
            return vector
    return vector


def timeStrToCor(timeStr):
    """
    00:00:00 to 25:59:59 is leagel.

    """
    illegalMsg = '00:00:00 to 25:59:59 is leagel, otherwise is illegal.'
    if timeStr is None:
        return None

    # AttributeError: 'datetime.time' object has no attribute 'split'
    if type(timeStr) == datetime.time:
        timeStr = timeStr.strftime('%H:%M:%S')

    timeStrLst = timeStr.split(':')
    hourStr = timeStrLst[0]
    hourNum = int(hourStr)
    if hourNum > 25:
        print(illegalMsg)
        return
    minuteStr = timeStrLst[1]
    minuteNum = int(minuteStr)
    if minuteNum > 59:
        print(illegalMsg)
        return
    if len(timeStrLst) == 3:
        secondStr = timeStrLst[2]
        secondNum = int(secondStr)
        if secondNum > 59:
            print(illegalMsg)
            return
        if secondNum >= 30:
            minuteNum += 1
    vectCordinate = (hourNum - 2) * 60 + minuteNum
    if vectCordinate < 0:
        vectCordinate += 24 * 60
    return vectCordinate


def form_people_type_str(sex, aged, edu):
    if isinstance(sex, int):
        sex = str(sex)
    if isinstance(aged, int):
        aged = str(aged)
    if isinstance(edu, int):
        edu = str(edu)
    return '__'.join([sex, aged, edu])


def form_house_people_type(sex, aged, edu, house_type, house_count=None,
                           pid=None):
    if house_count is not None:
        house_count = house_count
    else:
        house_id = pid[0:8]
        house_count = str(house_type[house_id])
    if isinstance(sex, int):
        sex = str(sex)
    if isinstance(aged, int):
        aged = str(aged)
    if isinstance(edu, int):
        edu = str(edu)
    return '__'.join([house_count, sex, aged, edu])


def format_raw_line(raw_line, headerLst=None, sep='\t'):
    line_Lst = raw_line.strip().split(sep)
    if headerLst is None:
        headerLst = ['pid', 'wei', 'sex', 'aged', 'edu', 'ch', 'minutes']
    line_dict = dict(zip(headerLst, line_Lst))
    if 'minutes' in line_dict:
        rawVectStr = line_dict['minutes']
        vector = corToVector(line_dict['minutes'])
    elif 'st' in line_dict:
        rawVectStr = line_dict['st']
        vector = corToVector(line_dict['st'])
    else:
        rawVectStr = ''
        vector = None
    if vector is not None:
        corLst = corStrToCorLst(line_dict['minutes'])
        vectorMin = int(min(corLst))
        vectorMax = int(max(corLst))
    else:
        vectorMin = 0
        vectorMax = 0
    line_dict.update({'minutes_first': vectorMin, 'minutes_last': vectorMax})
    if 'ch' not in line_dict:
        line_dict['ch'] = ''
    line_dict['minutes_rawStr'] = rawVectStr
    line_dict['minutes'] = vector
    line_dict['vector'] = line_dict['minutes']
    if 'wei' in line_dict:
        line_dict['weight'] = float(line_dict['wei'])
    elif 'weight' in line_dict:
        line_dict['weight'] = float(line_dict['weight'])
    line_dict.pop('wei', None)
    # house_id is the first 8 digits of pid
    line_dict['hid'] = line_dict['pid'][:8]
    line_dict['cityid'] = line_dict['pid'][:3]
    if 'BrandID' not in line_dict:
        line_dict['BrandID'] = ''
    if 'platform' not in line_dict:
        line_dict['platform'] = ''
    return line_dict


# def format_lines_from_one_file(raw_file_path, raw_file_name, sep='\t'):
#     lines_Lst = []
#     rf = open(raw_file_path + '/' + raw_file_name, 'r', encoding='utf-8')
#     first_l = rf.readline()
#     if 'pid' in first_l:
#         headerLst = first_l.strip().split(sep)
#     else:
#         headerLst = None
#         line_dict = format_raw_line(first_l, headerLst)
#         lines_Lst.append(line_dict)
#     for raw_line in rf:
#         line_dict = format_raw_line(raw_line, headerLst)
#         lines_Lst.append(line_dict)
#     rf.close()
#     return lines_Lst


def format_lines_from_one_file(raw_file_path, raw_file_name, sep='\t'):
    """should have better performance with yield"""

    # check first line containing headers or not
    skipFirstLineFlag = False
    rf = open(raw_file_path + '/' + raw_file_name, 'r', encoding='utf-8')
    first_l = rf.readline()
    if 'pid' in first_l:
        headerLst = first_l.strip().split(sep)
        skipFirstLineFlag = True
    else:
        headerLst = None
    rf.close()
    with open(raw_file_path + '/' + raw_file_name, 'r', encoding='utf-8') as rf:
        if skipFirstLineFlag:
            rf.readline()
        for raw_line in rf:
            line_dict = format_raw_line(raw_line, headerLst)
            yield line_dict


def PIV_from_one_file(raw_file_path, raw_file_name, house_type=None):

    lines_Lst = format_lines_from_one_file(raw_file_path, raw_file_name)

    # form channel-people dict
    channel_peopleLst_dict = {}
    weight_sum_dict = {}
    for lined in lines_Lst:
        #        people_type = '__'.join([lined['sex'], lined['aged'], lined['edu']])
        people_type = form_people_type_str(lined['sex'],
                                           lined['aged'],
                                           lined['edu'])
        if house_type is not None:
            people_type = form_house_people_type(sex=lined['sex'],
                                                 aged=lined['aged'],
                                                 edu=lined['edu'],
                                                 house_type=house_type,
                                                 pid=pid)
        pid = lined['pid']
        channel = lined['ch']
        vector = lined['minutes']
        weight = lined['weight']
        # update weight sum
        if pid not in weight_sum_dict:
            weight_sum_dict.update({pid: weight})
        if 'ch' not in lined or lined['minutes'] is None:
            # skip pid without view behavior, after weight sum
            continue

        # update channel_peopleLst_dict
        if channel not in channel_peopleLst_dict:
            channel_peopleLst_dict.update(
                {channel: {
                    people_type: [{'vector': vector,
                                   'weight': weight,
                                   'pid': pid}]}})
        else:
            if people_type not in channel_peopleLst_dict[channel]:
                channel_peopleLst_dict[channel].update(
                    {people_type: [{'vector': vector,
                                    'weight': weight,
                                    'pid': pid}]})
            else:
                channel_peopleLst_dict[channel][
                    people_type].append({'vector': vector,
                                         'weight': weight,
                                         'pid': pid})
    # total weight
    weight_sum = int(round(sum(weight_sum_dict.values())) * 1e3)

    channel_people_averageVector_dict = {}
    for ch in channel_peopleLst_dict:
        channel_people_averageVector_dict.update({ch: {}})
        for peo in channel_peopleLst_dict[ch]:
            channel_people_averageVector_dict[ch].update({peo: 0})
            for vect_meta in channel_peopleLst_dict[ch][peo]:
                channel_people_averageVector_dict[ch][
                    peo] += vect_meta['vector'] * vect_meta['weight'] * 1e3
            channel_people_averageVector_dict[ch][peo] /= weight_sum

    return (channel_people_averageVector_dict, weight_sum)


def PIV_model_gen(date_str, raw_file_path, house_type=None):
    try:
        date_T = datetime.datetime.strptime(date_str, '%y%m%d')
    except ValueError:
        try:
            date_T = datetime.datetime.strptime(date_str, '%Y%m%d')
        except ValueError:
            try:
                date_T = datetime.datetime.strptime(date_str, '%Y-%m-%d')
            except:
                print('Ill formatted date string %s' % date_str)
            else:
                date_str_iso = date_T.isoformat()[:10]
                date_str = date_str_iso.replace('-', '')[2:]
                raw_fn = '%s_Minutes.csv' % date_str
                piv_dict, weight_sum = PIV_from_one_file(raw_file_path, raw_fn,
                                                         house_type=house_type)
                date_channel_people_averVect = {date_str_iso: piv_dict}

                return date_channel_people_averVect, weight_sum


# test
if __name__ == '__main__':
    date_str = '2018-01-02'
    raw_file_path = 'D:\\CSM\\code_repo\\PIV\\data\\PIV_CD'
    channel_people_averageVector_dict, weight_sum = PIV_model_gen(
        date_str, raw_file_path)