Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
e746dd1a
Commit
e746dd1a
authored
Feb 19, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
数据指标波动假设检验统计
parent
31afa316
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
123 additions
and
121 deletions
+123
-121
hypothesis_test.py
eda/recommended_indexs/hypothesis_test.py
+123
-121
No files found.
eda/recommended_indexs/hypothesis_test.py
View file @
e746dd1a
...
...
@@ -18,6 +18,7 @@ def get_yesterday_date():
yesterday
=
yesterday
.
strftime
(
"
%
Y-
%
m-
%
d"
)
return
yesterday
yesterday
=
get_yesterday_date
print
(
yesterday
)
#自动获取10日前的日期
def
get_somedate
():
#自动获取10日前的日期,如"2018-07-28"
...
...
@@ -29,127 +30,128 @@ def get_somedate():
someday
=
someday
.
strftime
(
"
%
Y-
%
m-
%
d"
)
return
someday
ten_days
=
get_somedate
()
#获取最近10天的数据
def
DATA_recently
(
x
,
y
,
z
,
q
):
sql_cid
=
"select {0}/{1} as {2} from {3}
\
where stat_date >= ten_days group by stat_date"
.
format
(
x
,
y
,
z
,
q
)
CVR_DATA_recently
=
con_sql
(
sql_cid
)[
0
][
0
]
return
CVR_DATA_recently
#获取固定时间的10天的数据
def
DATA_fixed
(
x
,
y
,
z
,
q
):
sql_cid
=
"select {0}/{1} as {2} from {3}
\
where stat_date >= "
" and stat_date<"
" group by stat_date"
.
format
(
x
,
y
,
z
,
q
)
CVR_DATA_fixed
=
con_sql
(
sql_cid
)[
0
][
0
]
return
CVR_DATA_fixed
#新用户cvr
x_crv_new
=
DATA_recently
(
"diary_meigou_newUser"
,
"diary_clk_newUser"
,
"CVR_new"
,
"diary_meigou_crv"
)
y_crv_new
=
DATA_fixed
(
"diary_meigou_newUser"
,
"diary_clk_newUser"
,
"CVR_new"
,
"diary_meigou_crv"
)
#老用户cvr
x_crv_old
=
DATA_recently
(
"diary_meigou_oldUser"
,
"diary_clk_oldUser"
,
"CVR_old"
,
"diary_meigou_crv"
)
y_crv_old
=
DATA_fixed
(
"diary_meigou_oldUser"
,
"diary_clk_oldUser"
,
"CVR_old"
,
"diary_meigou_crv"
)
#新用户ct-cvr
x_ctcrv_new
=
DATA_recently
(
"diary_meigou_newUser"
,
"diary_exp_newUser"
,
"CT_CVR_new"
,
"diary_meigou_crv"
)
y_ctcrv_new
=
DATA_fixed
(
"diary_meigou_newUser"
,
"diary_exp_newUser"
,
"CT_CVR_new"
,
"diary_meigou_crv"
)
#老用户ct-cvr
x_ctcrv_old
=
DATA_recently
(
"diary_meigou_oldUser"
,
"diary_exp_oldUser"
,
"CT_CVR_old"
,
"diary_meigou_crv"
)
y_ctcrv_old
=
DATA_fixed
(
"diary_meigou_oldUser"
,
"diary_exp_oldUser"
,
"CT_CVR_old"
,
"diary_meigou_crv"
)
#新用户ctr(page_view)
x_ctr_new
=
DATA_recently
(
"clk_count_newUser_all"
,
"imp_count_newUser_all"
,
"ctr_new"
,
"bug_Recommendation_strategy_newUser"
)
y_ctr_new
=
DATA_fixed
(
"clk_count_newUser_all"
,
"imp_count_newUser_all"
,
"ctr_new"
,
"bug_Recommendation_strategy_newUser"
)
#老用户ctr(page_view)
x_ctr_old
=
DATA_recently
(
"clk_count_oldUser_all"
,
"imp_count_oldUser_all"
,
"ctr_old"
,
"bug_Recommendation_strategy_temp"
)
y_ctr_old
=
DATA_fixed
(
"clk_count_oldUser_all"
,
"imp_count_oldUser_all"
,
"ctr_old"
,
"bug_Recommendation_strategy_temp"
)
#新用户ctr(on_click_diary_card)
x_ctr_new_o
=
DATA_recently
(
"clk_count_newUser_all_a"
,
"imp_count_newUser_all"
,
"ctr_new"
,
"on_click_diary_card"
)
y_ctr_new_o
=
DATA_fixed
(
"clk_count_newUser_all_a"
,
"imp_count_newUser_all"
,
"ctr_new"
,
"on_click_diary_card"
)
#老用户ctr(on_click_diary_card)
x_ctr_old_o
=
DATA_recently
(
"clk_count_oldUser_all_a"
,
"imp_count_oldUser_all"
,
"ctr_old"
,
"on_click_diary_card"
)
y_ctr_old_o
=
DATA_fixed
(
"clk_count_oldUser_all_a"
,
"imp_count_oldUser_all"
,
"ctr_old"
,
"on_click_diary_card"
)
def
t_test
(
x
,
y
):
#进行t检验
#策略前的数据,赋值给x,策略后的数据赋值给y,均采用10日内数据
x
=
[
2
,
4
,
2
,
3
,
4
,
2
,
3
]
y
=
[
4
,
5
,
6
,
3
,
4
,
5
,
6
]
#检验数据方差是否齐性
a
=
levene
(
x
,
y
)
p_value
=
a
[
1
]
#结果若p_value>0.05,则认为两组数据方差是相等的,否则两组数据方差是不等的
if
p_value
>
0.05
:
#认为数据方差具有齐性,equal_var=ture
t_test
=
ttest_ind
(
x
,
y
,
equal_var
=
True
)
t_p_value
=
t_test
[
1
]
if
t_p_value
>
0.05
:
print
(
"策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:
%
f"
%
t_p_value
)
else
:
print
(
"策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:
%
f"
%
t_p_value
)
else
:
#认为数据方差不具有齐性,equal_var=false
t_test
=
ttest_ind
(
x
,
y
,
equal_var
=
False
)
t_p_value
=
t_test
[
1
]
if
t_p_value
>
0.05
:
print
(
"策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:
%
f"
%
t_p_value
)
else
:
print
(
"策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:
%
f"
%
t_p_value
)
###假设检验,判断是否具有显著性
#新用户cvr假设检验
crv_new_ttest
=
t_test
(
x_crv_new
,
y_crv_new
)
#老用户cvr假设检验
crv_old_ttest
=
t_test
(
x_crv_old
,
y_crv_old
)
#新用户ct_cvr假设检验
ctcrv_new_ttest
=
t_test
(
x_ctcrv_new
,
y_ctcrv_new
)
#老用户ct_cvr假设检验
ctcrv_old_ttest
=
t_test
(
x_ctcrv_old
,
y_ctcrv_old
)
#新用户ctr假设检验
ctr_new_ttest
=
t_test
(
x_ctr_new
,
y_ctr_new
)
#老用户ctr假设检验
ctr_old_ttest
=
t_test
(
x_ctr_old
,
y_ctr_old
)
##新用户ctr(on_click_diary_card)假设检验
ctr_new_o_ttest
=
t_test
(
x_ctr_new_o
,
y_ctr_new_o
)
#老用户ctr(on_click_diary_card)假设检验
ctr_old_o_ttest
=
t_test
(
x_ctr_old_o
,
y_ctr_old_o
)
###############推荐策略不变的情况下数据假设检验##############
#1 计算每日指标卡方检验
#自动获取5日前的日期
def
get_fivedate
():
#自动获取10日前的日期,如"2018-07-28"
"""
:rtype : str
"""
today
=
datetime
.
date
.
today
()
someday
=
today
-
datetime
.
timedelta
(
days
=
5
)
someday
=
someday
.
strftime
(
"
%
Y-
%
m-
%
d"
)
return
someday
five_days
=
get_fivedate
()
#获取最近5天的数据
def
chi_DATA_recently
(
x
,
y
,
z
):
sql_cid
=
"select AVG({0}),AVG({1}) from {2}
\
where stat_date >= five_days and stat_date<yesterday union all select {0},{1} from {2} where stat_date=yesterday}"
.
format
(
x
,
y
,
z
)
CVR_DATA_recently
=
con_sql
(
sql_cid
)[
0
][
0
]
return
CVR_DATA_recently
print
(
ten_days
)
# #获取最近10天的数据
# def DATA_recently(x,y,z,q):
# sql_cid = "select {0}/{1} as {2} from {3} \
# where stat_date >= ten_days group by stat_date".format(x,y,z,q)
# CVR_DATA_recently = con_sql(sql_cid)[0][0]
# return CVR_DATA_recently
#
# #获取固定时间的10天的数据
# def DATA_fixed(x,y,z,q):
# sql_cid = "select {0}/{1} as {2} from {3} \
# where stat_date >= " " and stat_date<" " group by stat_date".format(x,y,z,q)
# CVR_DATA_fixed = con_sql(sql_cid)[0][0]
# return CVR_DATA_fixed
#
# #新用户cvr
# x_crv_new=DATA_recently("diary_meigou_newUser","diary_clk_newUser","CVR_new","diary_meigou_crv")
# y_crv_new=DATA_fixed("diary_meigou_newUser","diary_clk_newUser","CVR_new","diary_meigou_crv")
#
# #老用户cvr
# x_crv_old=DATA_recently("diary_meigou_oldUser","diary_clk_oldUser","CVR_old","diary_meigou_crv")
# y_crv_old=DATA_fixed("diary_meigou_oldUser","diary_clk_oldUser","CVR_old","diary_meigou_crv")
#
# #新用户ct-cvr
# x_ctcrv_new=DATA_recently("diary_meigou_newUser","diary_exp_newUser","CT_CVR_new","diary_meigou_crv")
# y_ctcrv_new=DATA_fixed("diary_meigou_newUser","diary_exp_newUser","CT_CVR_new","diary_meigou_crv")
#
# #老用户ct-cvr
# x_ctcrv_old=DATA_recently("diary_meigou_oldUser","diary_exp_oldUser","CT_CVR_old","diary_meigou_crv")
# y_ctcrv_old=DATA_fixed("diary_meigou_oldUser","diary_exp_oldUser","CT_CVR_old","diary_meigou_crv")
#
# #新用户ctr(page_view)
# x_ctr_new=DATA_recently("clk_count_newUser_all","imp_count_newUser_all","ctr_new","bug_Recommendation_strategy_newUser")
# y_ctr_new=DATA_fixed("clk_count_newUser_all","imp_count_newUser_all","ctr_new","bug_Recommendation_strategy_newUser")
#
# #老用户ctr(page_view)
# x_ctr_old=DATA_recently("clk_count_oldUser_all","imp_count_oldUser_all","ctr_old","bug_Recommendation_strategy_temp")
# y_ctr_old=DATA_fixed("clk_count_oldUser_all","imp_count_oldUser_all","ctr_old","bug_Recommendation_strategy_temp")
#
# #新用户ctr(on_click_diary_card)
# x_ctr_new_o=DATA_recently("clk_count_newUser_all_a","imp_count_newUser_all","ctr_new","on_click_diary_card")
# y_ctr_new_o=DATA_fixed("clk_count_newUser_all_a","imp_count_newUser_all","ctr_new","on_click_diary_card")
#
# #老用户ctr(on_click_diary_card)
# x_ctr_old_o=DATA_recently("clk_count_oldUser_all_a","imp_count_oldUser_all","ctr_old","on_click_diary_card")
# y_ctr_old_o=DATA_fixed("clk_count_oldUser_all_a","imp_count_oldUser_all","ctr_old","on_click_diary_card")
#
#
#
#
# def t_test(x,y): #进行t检验
#
# #策略前的数据,赋值给x,策略后的数据赋值给y,均采用10日内数据
# x=[2,4,2,3,4,2,3]
# y=[4,5,6,3,4,5,6]
#
# #检验数据方差是否齐性
# a=levene(x,y)
# p_value=a[1] #结果若p_value>0.05,则认为两组数据方差是相等的,否则两组数据方差是不等的
#
# if p_value>0.05: #认为数据方差具有齐性,equal_var=ture
# t_test=ttest_ind(x,y,equal_var=True)
# t_p_value=t_test[1]
# if t_p_value>0.05:
# print("策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:%f" % t_p_value)
# else:
# print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:%f" % t_p_value)
# else: #认为数据方差不具有齐性,equal_var=false
# t_test = ttest_ind(x, y, equal_var=False)
# t_p_value = t_test[1]
# if t_p_value > 0.05:
# print("策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:%f" % t_p_value)
# else:
# print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:%f" % t_p_value)
#
# ###假设检验,判断是否具有显著性
#
# #新用户cvr假设检验
# crv_new_ttest=t_test(x_crv_new,y_crv_new)
# #老用户cvr假设检验
# crv_old_ttest=t_test(x_crv_old,y_crv_old)
#
# #新用户ct_cvr假设检验
# ctcrv_new_ttest=t_test(x_ctcrv_new,y_ctcrv_new)
# #老用户ct_cvr假设检验
# ctcrv_old_ttest=t_test(x_ctcrv_old,y_ctcrv_old)
#
#
# #新用户ctr假设检验
# ctr_new_ttest=t_test(x_ctr_new,y_ctr_new)
# #老用户ctr假设检验
# ctr_old_ttest=t_test(x_ctr_old,y_ctr_old)
#
# ##新用户ctr(on_click_diary_card)假设检验
# ctr_new_o_ttest=t_test(x_ctr_new_o,y_ctr_new_o)
# #老用户ctr(on_click_diary_card)假设检验
# ctr_old_o_ttest=t_test(x_ctr_old_o,y_ctr_old_o)
#
#
# ###############推荐策略不变的情况下数据假设检验##############
#
# #1 计算每日指标卡方检验
#
# #自动获取5日前的日期
# def get_fivedate():
# #自动获取10日前的日期,如"2018-07-28"
# """
# :rtype : str
# """
# today = datetime.date.today()
# someday = today - datetime.timedelta(days=5)
# someday = someday.strftime("%Y-%m-%d")
# return someday
# five_days=get_fivedate()
#
# #获取最近5天的数据
# def chi_DATA_recently(x,y,z):
# sql_cid = "select AVG({0}),AVG({1}) from {2} \
# where stat_date >= five_days and stat_date<yesterday union all select {0},{1} from {2} where stat_date=yesterday}".format(x,y,z)
# CVR_DATA_recently = con_sql(sql_cid)[0][0]
# return CVR_DATA_recently
# chi_cvr_new=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment