﻿# -*- coding: utf-8 -*-
"""
Created on Thu Oct  5 15:28:45 2017

Calculate missing data of monthly_net_inc values.

修改程序 使用redis计算月增量
每月3号 往redis写入上上个月的切片数据后计算上月的月增量
 
@author: hanye
"""

import datetime
import logging
import argparse
import sys
from func_cal_NI_by_redis import func_cal_increment
from func_cal_NI_by_redis import func_cal_new_released_NI
#from func_calculate_monthly_net_inc import cal_monthly_net_inc
from func_monthly_aggregated_daily_url_for_one_fetch_day import define_monthly_data_slice_type
#from func_data_correction_for_nag_MNI import data_correction
import task_stats

parser = argparse.ArgumentParser(description=('Calculate this monthly or last month'
                                              'by passed-in parameters.'))
parser.add_argument('-c', '--cal_month',
                    help=('if given "this_month", will calculate data for this month,',
                          ' that\'s the same month of the running time, '
                          'or, will calculate data for the previous month.'))
parser.add_argument('-t', '--ctype',
                    help=('will decide cal new_released or obversded,like N or O'))
parser.add_argument('-d', '--data_str',
                    help=('''like 2019-2-22,will cal index:short-video-production-2019
                          doc_type: daily-url-2019-01-31'''),
                    default=None)
parser.add_argument('-p', '--platform_list',  action='append', default=None)

parser.add_argument('-n', '--addMonth', default=None)
parser.add_argument('-redis', '--redis',default=10, type=int,
                    help=(' redis_num'))

args = parser.parse_args()

if args.addMonth:
    runday_ = datetime.datetime.now()
    if runday_.month == 12:
        new_year = runday_.year + 1
        new_month = 1
    else:
        new_year = runday_.year
        new_month = runday_.month + 1
    runday = datetime.datetime(new_year, new_month, 10)
else:
    runday = datetime.datetime.now()


if args.data_str == None:
    now_cal_date_str = runday
    now_cal_month = runday.month
    if now_cal_month == 1:
        cal_month = 12
        last_cal_month = 11
        last_cal_year = runday.year - 1
        cal_year = runday.year - 1
    elif now_cal_month == 2:
        cal_month = 1
        last_cal_month = 12
        last_cal_year = runday.year - 1
        cal_year = runday.year
    else:
        last_cal_year = cal_year = runday.year
        cal_month = now_cal_month - 1
        last_cal_month = now_cal_month - 2
    
    index = 'short-video-production-' + str(cal_year)
    last_index = 'short-video-production-' + str(last_cal_year)
    
    doc_type_monthly, last_day_of_this_month_T = define_monthly_data_slice_type(
        cal_year, cal_month, runday.day)
    last_doc_type_monthly, last_last_day_of_this_month_T = define_monthly_data_slice_type(
        last_cal_year, last_cal_month, runday.day)
    first_day_in_next_month_T = last_day_of_this_month_T + datetime.timedelta(days=1)

    year_start = last_day_of_this_month_T.year
    month_start = last_day_of_this_month_T.month
    cal_month_str = datetime.datetime.strftime(last_day_of_this_month_T, '%b%Y')
    cal_month_T = last_day_of_this_month_T
else:
    try:
        runday = datetime.datetime.strptime(args.data_str, '%Y-%m-%d')
    except:
        print('error -d ')
        sys.exit(0)
    now_cal_date_str = runday
    now_cal_month = runday.month
    if now_cal_month == 1:
        cal_month = 12
        last_cal_month = 11
        last_cal_year, cal_year = runday.year - 1
    elif now_cal_month == 2:
        cal_month = 1
        last_cal_month = 12
        last_cal_year = runday.year - 1
        cal_year = runday.year
    else:
        last_cal_year = cal_year = runday.year
        cal_month = now_cal_month - 1
        last_cal_month = now_cal_month - 2
    
    index = 'short-video-production-' + str(cal_year)
    last_index = 'short-video-production-' + str(last_cal_year)
    
    doc_type_monthly, last_day_of_this_month_T = define_monthly_data_slice_type(
        cal_year, cal_month, runday.day)
    last_doc_type_monthly, last_last_day_of_this_month_T = define_monthly_data_slice_type(
        last_cal_year, last_cal_month, runday.day)
    first_day_in_next_month_T = last_day_of_this_month_T + datetime.timedelta(days=1)

    year_start = last_day_of_this_month_T.year
    month_start = last_day_of_this_month_T.month
    cal_month_str = datetime.datetime.strftime(last_day_of_this_month_T, '%b%Y')
    cal_month_T = last_day_of_this_month_T

if args.ctype == None:
    new_released = True
    obversed = True
elif args.ctype != None:
    if args.ctype == 'O':
        new_released = False
        obversed = True 
    if args.ctype == 'N':
        new_released = True
        obversed = False
    else:
        print('error in --ctype')
# define logger
logger_name = 'calculate_MNI_by_redis'
logger = logging.getLogger(logger_name)
logger.setLevel(logging.DEBUG)
# create handler
path = ('/home/hanye/project_data/Python/Projects/proj-short-videos/'
        'write-data-into-es/log/')


log_fn = ('calculate_MNI_for_%s_by_redis_on_%s_log'
          % (cal_month_str, datetime.datetime.now().isoformat().replace(':', '-')))
log_file = open(path + log_fn, 'w')
print('start: \nthis-doc-type:', doc_type_monthly, '\nlast_doctype:',
      last_doc_type_monthly, '\nthis-index:', index, '\nlast-index:', last_index)
print('wiil cal MNI on index: %s - %s'%(doc_type_monthly, datetime.datetime.now()), file=log_file)
print('cal index : %s  %s'%(doc_type_monthly, datetime.datetime.now()), file=log_file)
print('last  index need write into redis : %s  %s'%(doc_type_monthly, datetime.datetime.now()), file=log_file)

# 计算增量 分为两部分，一个是历史的，不是新发布的，另一个是新增的
# 1. 计算历史的
# 限制发布时间在被计算的这个月第一天的前60天内的数据
ob_release_time_end_dt = last_last_day_of_this_month_T + datetime.timedelta(1)
ob_release_time_start_dt = ob_release_time_end_dt -  datetime.timedelta(60)
ob_release_time_end_ts = int(datetime.datetime.timestamp(ob_release_time_end_dt)*1000)
ob_release_time_start_ts = int(datetime.datetime.timestamp(ob_release_time_start_dt)*1000)

#将任务 记录在 es task-stats索引中 
task_start_ts = int(runday.timestamp()*1e3) 
task_stats.record_task_stats(
    task_name='calculate_monthly_net_inc_by_redis_monthly_task',
    program_file_name='calculate_monthly_net_inc_for_by_redis_monthly_task.py',
    task_freq='monthly',
    start_time=task_start_ts,
    time_of_processed_data=int(cal_month_T.timestamp()*1e3),
    with_task_description=False
    )
# 计算obversed 
if obversed:
    print('cal ob')
    print('will cal obversed *******',datetime.datetime.now(), file=log_file)
    func_cal_increment(index_last=last_index, doc_type_last=last_doc_type_monthly, 
               cal_type='M',index_now=index, doc_type_now=doc_type_monthly, 
               release_time_st_last=ob_release_time_start_ts, release_time_et_last=ob_release_time_end_ts, 
               redis_db=args.redis, log_f=log_file, limit_platform=args.platform_list)

#2计算新发布的
if new_released:
    print('cal new')
    nw_release_time_end_dt = first_day_in_next_month_T
    nw_release_time_start_dt = ob_release_time_end_dt
    nw_release_time_end_ts = int(datetime.datetime.timestamp(first_day_in_next_month_T)*1000)
    nw_release_time_start_ts = int(datetime.datetime.timestamp(nw_release_time_start_dt)*1000)
    print('will cal new_released *******',datetime.datetime.now(), file=log_file)
    func_cal_new_released_NI(cal_type='M',index_now=index, doc_type_now=doc_type_monthly,
                             release_time_st_now=nw_release_time_start_ts,
                             release_time_et_now=nw_release_time_end_ts,
                             log_f=log_file, limit_platform=args.platform_list)




print('All done.', datetime.datetime.now(), file=log_file)

# write task status into task-stats index when ends
task_end_ts = int(datetime.datetime.now().timestamp()*1e3)
task_stats.record_task_stats(
    task_name='calculate_monthly_net_inc_by_redis_monthly_task',
    program_file_name='calculate_monthly_net_inc_for_by_redis_monthly_task.py',
    task_freq='monthly',
    start_time=task_start_ts,
    time_of_processed_data=int(cal_month_T.timestamp()*1e3),
    end_time=task_end_ts,
    is_done=True,
    task_stats='Done',
    with_task_description=False
    )
