# -*- coding: utf-8 -*-
"""
Created on Fri Feb 23 16:37:14 2018

processing one line a time to return dict, using re modules 
to extract data from string

@author: hanye
"""

import re


def form_dict_from_csv_line(line_str, header_Lst):
    # using regular expression instead of directly spliting by ','
    pattern='".*?",|".*?"\n'
    line_Lst=re.findall(pattern, line_str)
    if len(line_Lst)==len(header_Lst):  # indicating all fields are seperated by "" pair
        # remove "" pairs in the value
        line_Lst_c=[]
        for ll in line_Lst:
            ll=ll.strip().replace('"', '').replace(',','')
            line_Lst_c.append(ll)
        data_line_dict = dict(zip(header_Lst, line_Lst_c))
    elif len(line_Lst)>=1:  # indicating some parts of fields are seperated by "" pair, the other parts are not
        # replace English comma inside "" pair
        rplc_dict={}
        for ll in line_Lst:
            ll_c=ll.replace(',', '，')
            line_str=line_str.replace(ll, ll_c)
            rplc_dict[ll_c.replace('"', '')]=ll.replace('"', '')
        line_Lst_c = line_str.strip().replace('"','').split(',')
        if len(line_Lst_c)==len(header_Lst):
            line_Lst_c_new=[]
            for ll2 in line_Lst_c:
                if ll2 in rplc_dict:
                    line_Lst_c_new.append(rplc_dict[ll2])
                else:
                    line_Lst_c_new.append(ll2)
            data_line_dict = dict(zip(header_Lst, line_Lst_c_new))
        else:
            data_line_dict=None
    elif len(line_Lst)==0:  # indicating there is no "" pair in the line
        line_Lst=line_str.strip().split(',')
        if len(line_Lst)==len(header_Lst):
            data_line_dict = dict(zip(header_Lst, line_Lst))
        else:
            data_line_dict=None
    else:
        data_line_dict=None
    
    return data_line_dict
