Commit e746dd1a authored by 王志伟's avatar 王志伟

数据指标波动假设检验统计

parent 31afa316
...@@ -18,6 +18,7 @@ def get_yesterday_date(): ...@@ -18,6 +18,7 @@ def get_yesterday_date():
yesterday = yesterday.strftime("%Y-%m-%d") yesterday = yesterday.strftime("%Y-%m-%d")
return yesterday return yesterday
yesterday=get_yesterday_date yesterday=get_yesterday_date
print(yesterday)
#自动获取10日前的日期 #自动获取10日前的日期
def get_somedate(): def get_somedate():
#自动获取10日前的日期,如"2018-07-28" #自动获取10日前的日期,如"2018-07-28"
...@@ -29,127 +30,128 @@ def get_somedate(): ...@@ -29,127 +30,128 @@ def get_somedate():
someday = someday.strftime("%Y-%m-%d") someday = someday.strftime("%Y-%m-%d")
return someday return someday
ten_days=get_somedate() ten_days=get_somedate()
print(ten_days)
#获取最近10天的数据
def DATA_recently(x,y,z,q): # #获取最近10天的数据
sql_cid = "select {0}/{1} as {2} from {3} \ # def DATA_recently(x,y,z,q):
where stat_date >= ten_days group by stat_date".format(x,y,z,q) # sql_cid = "select {0}/{1} as {2} from {3} \
CVR_DATA_recently = con_sql(sql_cid)[0][0] # where stat_date >= ten_days group by stat_date".format(x,y,z,q)
return CVR_DATA_recently # CVR_DATA_recently = con_sql(sql_cid)[0][0]
# return CVR_DATA_recently
#获取固定时间的10天的数据 #
def DATA_fixed(x,y,z,q): # #获取固定时间的10天的数据
sql_cid = "select {0}/{1} as {2} from {3} \ # def DATA_fixed(x,y,z,q):
where stat_date >= " " and stat_date<" " group by stat_date".format(x,y,z,q) # sql_cid = "select {0}/{1} as {2} from {3} \
CVR_DATA_fixed = con_sql(sql_cid)[0][0] # where stat_date >= " " and stat_date<" " group by stat_date".format(x,y,z,q)
return CVR_DATA_fixed # CVR_DATA_fixed = con_sql(sql_cid)[0][0]
# return CVR_DATA_fixed
#新用户cvr #
x_crv_new=DATA_recently("diary_meigou_newUser","diary_clk_newUser","CVR_new","diary_meigou_crv") # #新用户cvr
y_crv_new=DATA_fixed("diary_meigou_newUser","diary_clk_newUser","CVR_new","diary_meigou_crv") # x_crv_new=DATA_recently("diary_meigou_newUser","diary_clk_newUser","CVR_new","diary_meigou_crv")
# y_crv_new=DATA_fixed("diary_meigou_newUser","diary_clk_newUser","CVR_new","diary_meigou_crv")
#老用户cvr #
x_crv_old=DATA_recently("diary_meigou_oldUser","diary_clk_oldUser","CVR_old","diary_meigou_crv") # #老用户cvr
y_crv_old=DATA_fixed("diary_meigou_oldUser","diary_clk_oldUser","CVR_old","diary_meigou_crv") # x_crv_old=DATA_recently("diary_meigou_oldUser","diary_clk_oldUser","CVR_old","diary_meigou_crv")
# y_crv_old=DATA_fixed("diary_meigou_oldUser","diary_clk_oldUser","CVR_old","diary_meigou_crv")
#新用户ct-cvr #
x_ctcrv_new=DATA_recently("diary_meigou_newUser","diary_exp_newUser","CT_CVR_new","diary_meigou_crv") # #新用户ct-cvr
y_ctcrv_new=DATA_fixed("diary_meigou_newUser","diary_exp_newUser","CT_CVR_new","diary_meigou_crv") # x_ctcrv_new=DATA_recently("diary_meigou_newUser","diary_exp_newUser","CT_CVR_new","diary_meigou_crv")
# y_ctcrv_new=DATA_fixed("diary_meigou_newUser","diary_exp_newUser","CT_CVR_new","diary_meigou_crv")
#老用户ct-cvr #
x_ctcrv_old=DATA_recently("diary_meigou_oldUser","diary_exp_oldUser","CT_CVR_old","diary_meigou_crv") # #老用户ct-cvr
y_ctcrv_old=DATA_fixed("diary_meigou_oldUser","diary_exp_oldUser","CT_CVR_old","diary_meigou_crv") # x_ctcrv_old=DATA_recently("diary_meigou_oldUser","diary_exp_oldUser","CT_CVR_old","diary_meigou_crv")
# y_ctcrv_old=DATA_fixed("diary_meigou_oldUser","diary_exp_oldUser","CT_CVR_old","diary_meigou_crv")
#新用户ctr(page_view) #
x_ctr_new=DATA_recently("clk_count_newUser_all","imp_count_newUser_all","ctr_new","bug_Recommendation_strategy_newUser") # #新用户ctr(page_view)
y_ctr_new=DATA_fixed("clk_count_newUser_all","imp_count_newUser_all","ctr_new","bug_Recommendation_strategy_newUser") # x_ctr_new=DATA_recently("clk_count_newUser_all","imp_count_newUser_all","ctr_new","bug_Recommendation_strategy_newUser")
# y_ctr_new=DATA_fixed("clk_count_newUser_all","imp_count_newUser_all","ctr_new","bug_Recommendation_strategy_newUser")
#老用户ctr(page_view) #
x_ctr_old=DATA_recently("clk_count_oldUser_all","imp_count_oldUser_all","ctr_old","bug_Recommendation_strategy_temp") # #老用户ctr(page_view)
y_ctr_old=DATA_fixed("clk_count_oldUser_all","imp_count_oldUser_all","ctr_old","bug_Recommendation_strategy_temp") # x_ctr_old=DATA_recently("clk_count_oldUser_all","imp_count_oldUser_all","ctr_old","bug_Recommendation_strategy_temp")
# y_ctr_old=DATA_fixed("clk_count_oldUser_all","imp_count_oldUser_all","ctr_old","bug_Recommendation_strategy_temp")
#新用户ctr(on_click_diary_card) #
x_ctr_new_o=DATA_recently("clk_count_newUser_all_a","imp_count_newUser_all","ctr_new","on_click_diary_card") # #新用户ctr(on_click_diary_card)
y_ctr_new_o=DATA_fixed("clk_count_newUser_all_a","imp_count_newUser_all","ctr_new","on_click_diary_card") # x_ctr_new_o=DATA_recently("clk_count_newUser_all_a","imp_count_newUser_all","ctr_new","on_click_diary_card")
# y_ctr_new_o=DATA_fixed("clk_count_newUser_all_a","imp_count_newUser_all","ctr_new","on_click_diary_card")
#老用户ctr(on_click_diary_card) #
x_ctr_old_o=DATA_recently("clk_count_oldUser_all_a","imp_count_oldUser_all","ctr_old","on_click_diary_card") # #老用户ctr(on_click_diary_card)
y_ctr_old_o=DATA_fixed("clk_count_oldUser_all_a","imp_count_oldUser_all","ctr_old","on_click_diary_card") # x_ctr_old_o=DATA_recently("clk_count_oldUser_all_a","imp_count_oldUser_all","ctr_old","on_click_diary_card")
# y_ctr_old_o=DATA_fixed("clk_count_oldUser_all_a","imp_count_oldUser_all","ctr_old","on_click_diary_card")
#
#
#
def t_test(x,y): #进行t检验 #
# def t_test(x,y): #进行t检验
#策略前的数据,赋值给x,策略后的数据赋值给y,均采用10日内数据 #
x=[2,4,2,3,4,2,3] # #策略前的数据,赋值给x,策略后的数据赋值给y,均采用10日内数据
y=[4,5,6,3,4,5,6] # x=[2,4,2,3,4,2,3]
# y=[4,5,6,3,4,5,6]
#检验数据方差是否齐性 #
a=levene(x,y) # #检验数据方差是否齐性
p_value=a[1] #结果若p_value>0.05,则认为两组数据方差是相等的,否则两组数据方差是不等的 # a=levene(x,y)
# p_value=a[1] #结果若p_value>0.05,则认为两组数据方差是相等的,否则两组数据方差是不等的
if p_value>0.05: #认为数据方差具有齐性,equal_var=ture #
t_test=ttest_ind(x,y,equal_var=True) # if p_value>0.05: #认为数据方差具有齐性,equal_var=ture
t_p_value=t_test[1] # t_test=ttest_ind(x,y,equal_var=True)
if t_p_value>0.05: # t_p_value=t_test[1]
print("策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:%f" % t_p_value) # if t_p_value>0.05:
else: # print("策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:%f" % t_p_value)
print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:%f" % t_p_value) # else:
else: #认为数据方差不具有齐性,equal_var=false # print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:%f" % t_p_value)
t_test = ttest_ind(x, y, equal_var=False) # else: #认为数据方差不具有齐性,equal_var=false
t_p_value = t_test[1] # t_test = ttest_ind(x, y, equal_var=False)
if t_p_value > 0.05: # t_p_value = t_test[1]
print("策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:%f" % t_p_value) # if t_p_value > 0.05:
else: # print("策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:%f" % t_p_value)
print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:%f" % t_p_value) # else:
# print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:%f" % t_p_value)
###假设检验,判断是否具有显著性 #
# ###假设检验,判断是否具有显著性
#新用户cvr假设检验 #
crv_new_ttest=t_test(x_crv_new,y_crv_new) # #新用户cvr假设检验
#老用户cvr假设检验 # crv_new_ttest=t_test(x_crv_new,y_crv_new)
crv_old_ttest=t_test(x_crv_old,y_crv_old) # #老用户cvr假设检验
# crv_old_ttest=t_test(x_crv_old,y_crv_old)
#新用户ct_cvr假设检验 #
ctcrv_new_ttest=t_test(x_ctcrv_new,y_ctcrv_new) # #新用户ct_cvr假设检验
#老用户ct_cvr假设检验 # ctcrv_new_ttest=t_test(x_ctcrv_new,y_ctcrv_new)
ctcrv_old_ttest=t_test(x_ctcrv_old,y_ctcrv_old) # #老用户ct_cvr假设检验
# ctcrv_old_ttest=t_test(x_ctcrv_old,y_ctcrv_old)
#
#新用户ctr假设检验 #
ctr_new_ttest=t_test(x_ctr_new,y_ctr_new) # #新用户ctr假设检验
#老用户ctr假设检验 # ctr_new_ttest=t_test(x_ctr_new,y_ctr_new)
ctr_old_ttest=t_test(x_ctr_old,y_ctr_old) # #老用户ctr假设检验
# ctr_old_ttest=t_test(x_ctr_old,y_ctr_old)
##新用户ctr(on_click_diary_card)假设检验 #
ctr_new_o_ttest=t_test(x_ctr_new_o,y_ctr_new_o) # ##新用户ctr(on_click_diary_card)假设检验
#老用户ctr(on_click_diary_card)假设检验 # ctr_new_o_ttest=t_test(x_ctr_new_o,y_ctr_new_o)
ctr_old_o_ttest=t_test(x_ctr_old_o,y_ctr_old_o) # #老用户ctr(on_click_diary_card)假设检验
# ctr_old_o_ttest=t_test(x_ctr_old_o,y_ctr_old_o)
#
###############推荐策略不变的情况下数据假设检验############## #
# ###############推荐策略不变的情况下数据假设检验##############
#1 计算每日指标卡方检验 #
# #1 计算每日指标卡方检验
#自动获取5日前的日期 #
def get_fivedate(): # #自动获取5日前的日期
#自动获取10日前的日期,如"2018-07-28" # def get_fivedate():
""" # #自动获取10日前的日期,如"2018-07-28"
:rtype : str # """
""" # :rtype : str
today = datetime.date.today() # """
someday = today - datetime.timedelta(days=5) # today = datetime.date.today()
someday = someday.strftime("%Y-%m-%d") # someday = today - datetime.timedelta(days=5)
return someday # someday = someday.strftime("%Y-%m-%d")
five_days=get_fivedate() # return someday
# five_days=get_fivedate()
#获取最近5天的数据 #
def chi_DATA_recently(x,y,z): # #获取最近5天的数据
sql_cid = "select AVG({0}),AVG({1}) from {2} \ # def chi_DATA_recently(x,y,z):
where stat_date >= five_days and stat_date<yesterday union all select {0},{1} from {2} where stat_date=yesterday}".format(x,y,z) # sql_cid = "select AVG({0}),AVG({1}) from {2} \
CVR_DATA_recently = con_sql(sql_cid)[0][0] # where stat_date >= five_days and stat_date<yesterday union all select {0},{1} from {2} where stat_date=yesterday}".format(x,y,z)
return CVR_DATA_recently # CVR_DATA_recently = con_sql(sql_cid)[0][0]
# return CVR_DATA_recently
# chi_cvr_new= # chi_cvr_new=
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment