Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
ebd1d550
Commit
ebd1d550
authored
Feb 21, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
数据指标波动假设检验统计
parent
fbbd84f7
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
23 deletions
+23
-23
hypothesis_test.py
eda/recommended_indexs/hypothesis_test.py
+23
-23
No files found.
eda/recommended_indexs/hypothesis_test.py
View file @
ebd1d550
...
@@ -38,7 +38,7 @@ def get_somedate():
...
@@ -38,7 +38,7 @@ def get_somedate():
return
someday
return
someday
ten_days
=
get_somedate
()
ten_days
=
get_somedate
()
# print(ten_days)
# print(ten_days)
print
(
"===========分割线,T检验最近10日指标与策略前10日指标是否获得显著提升============"
)
#获取最近10天的数据
#获取最近10天的数据
def
DATA_recently
(
x
,
y
,
z
,
q
,
t
):
def
DATA_recently
(
x
,
y
,
z
,
q
,
t
):
ten_days
=
get_somedate
()
ten_days
=
get_somedate
()
...
@@ -117,20 +117,20 @@ def t_test(x,y): #进行t检验
...
@@ -117,20 +117,20 @@ def t_test(x,y): #进行t检验
t_p_value
=
t_test
[
1
]
t_p_value
=
t_test
[
1
]
# print(t_p_value)
# print(t_p_value)
if
t_p_value
>
0.05
:
if
t_p_value
>
0.05
:
print
(
"策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:{}"
.
format
(
t_p_value
))
print
(
"
95
%
置信度认为
策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:{}"
.
format
(
t_p_value
))
print
(
"
\n
"
)
print
(
"
\n
"
)
else
:
else
:
print
(
"策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}"
.
format
(
t_p_value
))
print
(
"
95
%
置信度认为
策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}"
.
format
(
t_p_value
))
print
(
"
\n
"
)
print
(
"
\n
"
)
else
:
#认为数据方差不具有齐性,equal_var=false
else
:
#认为数据方差不具有齐性,equal_var=false
t_test
=
ttest_ind
(
x
,
y
,
equal_var
=
False
)
t_test
=
ttest_ind
(
x
,
y
,
equal_var
=
False
)
t_p_value
=
t_test
[
1
]
t_p_value
=
t_test
[
1
]
# print(t_p_value)
# print(t_p_value)
if
t_p_value
>
0.05
:
if
t_p_value
>
0.05
:
print
(
"策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:{}"
.
format
(
t_p_value
))
print
(
"
95
%
置信度认为
策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:{}"
.
format
(
t_p_value
))
print
(
"
\n
"
)
print
(
"
\n
"
)
else
:
else
:
print
(
"策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}"
.
format
(
t_p_value
))
print
(
"
95
%
置信度认为
策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}"
.
format
(
t_p_value
))
print
(
"
\n
"
)
print
(
"
\n
"
)
#
#
# ###假设检验,判断是否具有显著性
# ###假设检验,判断是否具有显著性
...
@@ -165,7 +165,7 @@ print("【8】老用户CTR假设检验(on_click_diary_card)结果:")
...
@@ -165,7 +165,7 @@ print("【8】老用户CTR假设检验(on_click_diary_card)结果:")
ctr_old_o_ttest
=
t_test
(
x_ctr_old_o
,
y_ctr_old_o
)
ctr_old_o_ttest
=
t_test
(
x_ctr_old_o
,
y_ctr_old_o
)
#
#
# ###############推荐策略不变的情况下数据假设检验##############
# ###############推荐策略不变的情况下数据假设检验##############
print
(
"===========
=================分割线=======================
============"
)
print
(
"===========
分割线,卡方检验昨日指标与前5日指标均值是否显著变化
============"
)
# #1 计算每日指标卡方检验
# #1 计算每日指标卡方检验
#
#
# #自动获取5日前的日期
# #自动获取5日前的日期
...
@@ -306,7 +306,7 @@ ctcvr_tst_new=data_cal(ctcvr_new,ctcvr_new2)
...
@@ -306,7 +306,7 @@ ctcvr_tst_new=data_cal(ctcvr_new,ctcvr_new2)
chi_cal
(
ctcvr_tst_new
)
chi_cal
(
ctcvr_tst_new
)
# ###############数据波动大小检验##############
# ###############数据波动大小检验##############
print
(
"============================分割线==================================="
)
print
(
"============================分割线
,开始检测各个指标的5日内的方差和均值
==================================="
)
def
get_var_data1
(
x
,
y
,
z
,
t1
):
def
get_var_data1
(
x
,
y
,
z
,
t1
):
sql_cid
=
"select {0}/{1} from {2}
\
sql_cid
=
"select {0}/{1} from {2}
\
...
@@ -328,8 +328,8 @@ var_ctcvr_old_data=get_var_data1("diary_meigou_oldUser","diary_exp_oldUser","dia
...
@@ -328,8 +328,8 @@ var_ctcvr_old_data=get_var_data1("diary_meigou_oldUser","diary_exp_oldUser","dia
var_ctcvr_old_D
=
collect_data
(
var_ctcvr_old_data
)
var_ctcvr_old_D
=
collect_data
(
var_ctcvr_old_data
)
var_ctcvr_old
=
np
.
var
(
var_ctcvr_old_D
)
var_ctcvr_old
=
np
.
var
(
var_ctcvr_old_D
)
mean_var_ctcvr_old
=
np
.
mean
(
var_ctcvr_old_D
)
mean_var_ctcvr_old
=
np
.
mean
(
var_ctcvr_old_D
)
print
(
"【1】老用户CT-CVR数据波动5日内方差检验结果:{}"
.
format
(
var_ctcvr_old
))
print
(
"【1
-1
】老用户CT-CVR数据波动5日内方差检验结果:{}"
.
format
(
var_ctcvr_old
))
print
(
"老用户CT-CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctcvr_old
))
print
(
"
【1-2】
老用户CT-CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctcvr_old
))
print
(
"
\n
"
)
print
(
"
\n
"
)
...
@@ -337,16 +337,16 @@ var_ctcvr_new_data=get_var_data1("diary_meigou_newUser","diary_exp_newUser","dia
...
@@ -337,16 +337,16 @@ var_ctcvr_new_data=get_var_data1("diary_meigou_newUser","diary_exp_newUser","dia
var_ctcvr_new_D
=
collect_data
(
var_ctcvr_new_data
)
var_ctcvr_new_D
=
collect_data
(
var_ctcvr_new_data
)
var_ctcvr_new
=
np
.
var
(
var_ctcvr_new_D
)
var_ctcvr_new
=
np
.
var
(
var_ctcvr_new_D
)
mean_var_ctcvr_new
=
np
.
mean
(
var_ctcvr_new_D
)
mean_var_ctcvr_new
=
np
.
mean
(
var_ctcvr_new_D
)
print
(
"【2】新用户CT-CVR数据波动5日内方差检验结果:{}"
.
format
(
var_ctcvr_new
))
print
(
"【2
-1
】新用户CT-CVR数据波动5日内方差检验结果:{}"
.
format
(
var_ctcvr_new
))
print
(
"新用户CT-CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctcvr_new
))
print
(
"
【2-2】
新用户CT-CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctcvr_new
))
print
(
"
\n
"
)
print
(
"
\n
"
)
var_cvr_old_data
=
get_var_data1
(
"diary_meigou_oldUser"
,
"diary_clk_oldUser"
,
"diary_meigou_crv"
,
five_days
)
var_cvr_old_data
=
get_var_data1
(
"diary_meigou_oldUser"
,
"diary_clk_oldUser"
,
"diary_meigou_crv"
,
five_days
)
var_cvr_old_D
=
collect_data
(
var_cvr_old_data
)
var_cvr_old_D
=
collect_data
(
var_cvr_old_data
)
var_cvr_old
=
np
.
var
(
var_cvr_old_D
)
var_cvr_old
=
np
.
var
(
var_cvr_old_D
)
mean_var_cvr_old
=
np
.
mean
(
var_cvr_old_D
)
mean_var_cvr_old
=
np
.
mean
(
var_cvr_old_D
)
print
(
"【3】老用户CVR数据波动5日内方差检验结果:{}"
.
format
(
var_cvr_old
))
print
(
"【3
-1
】老用户CVR数据波动5日内方差检验结果:{}"
.
format
(
var_cvr_old
))
print
(
"老用户CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_cvr_old
))
print
(
"
【3-2】
老用户CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_cvr_old
))
print
(
"
\n
"
)
print
(
"
\n
"
)
#
#
...
@@ -354,40 +354,40 @@ var_cvr_new_data=get_var_data1("diary_meigou_newUser","diary_clk_newUser","diary
...
@@ -354,40 +354,40 @@ var_cvr_new_data=get_var_data1("diary_meigou_newUser","diary_clk_newUser","diary
var_cvr_new_D
=
collect_data
(
var_cvr_new_data
)
var_cvr_new_D
=
collect_data
(
var_cvr_new_data
)
var_cvr_new
=
np
.
var
(
var_cvr_new_D
)
var_cvr_new
=
np
.
var
(
var_cvr_new_D
)
mean_var_cvr_new
=
np
.
mean
(
var_cvr_new_D
)
mean_var_cvr_new
=
np
.
mean
(
var_cvr_new_D
)
print
(
"【4】新用户CVR数据波动5日内方差检验结果:{}"
.
format
(
var_cvr_new
))
print
(
"【4
-1
】新用户CVR数据波动5日内方差检验结果:{}"
.
format
(
var_cvr_new
))
print
(
"新用户CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_cvr_new
))
print
(
"
【4-2】
新用户CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_cvr_new
))
print
(
"
\n
"
)
print
(
"
\n
"
)
var_ctr_old_data
=
get_var_data2
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all"
,
"on_click_diary_card"
,
five_days
)
var_ctr_old_data
=
get_var_data2
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all"
,
"on_click_diary_card"
,
five_days
)
var_ctr_old_D
=
collect_data
(
var_ctr_old_data
)
var_ctr_old_D
=
collect_data
(
var_ctr_old_data
)
var_ctr_old
=
np
.
var
(
var_cvr_old_D
)
var_ctr_old
=
np
.
var
(
var_cvr_old_D
)
mean_var_ctr_old
=
np
.
mean
(
var_ctr_old_D
)
mean_var_ctr_old
=
np
.
mean
(
var_ctr_old_D
)
print
(
"【5】老用户CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_old
))
print
(
"【5
-1
】老用户CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_old
))
print
(
"老用户CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_old
))
print
(
"
【5-2】
老用户CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_old
))
print
(
"
\n
"
)
print
(
"
\n
"
)
var_ctr_new_data
=
get_var_data2
(
"clk_count_newUser_all_a"
,
"clk_count_newUser_all_b"
,
"imp_count_newUser_all"
,
"on_click_diary_card"
,
five_days
)
var_ctr_new_data
=
get_var_data2
(
"clk_count_newUser_all_a"
,
"clk_count_newUser_all_b"
,
"imp_count_newUser_all"
,
"on_click_diary_card"
,
five_days
)
var_ctr_new_D
=
collect_data
(
var_ctr_new_data
)
var_ctr_new_D
=
collect_data
(
var_ctr_new_data
)
var_ctr_new
=
np
.
var
(
var_ctr_new_D
)
var_ctr_new
=
np
.
var
(
var_ctr_new_D
)
mean_var_ctr_new
=
np
.
mean
(
var_ctr_new_D
)
mean_var_ctr_new
=
np
.
mean
(
var_ctr_new_D
)
print
(
"【6】新用户CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_new
))
print
(
"【6
-1
】新用户CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_new
))
print
(
"新用户CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_new
))
print
(
"
【6-2】
新用户CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_new
))
print
(
"
\n
"
)
print
(
"
\n
"
)
var_ctr_new_precise_data
=
get_var_data2
(
"clk_count_newUser_all_a"
,
"clk_count_newUser_all_b"
,
"imp_count_newUser_all_precise"
,
"on_click_diary_card"
,
five_days
)
var_ctr_new_precise_data
=
get_var_data2
(
"clk_count_newUser_all_a"
,
"clk_count_newUser_all_b"
,
"imp_count_newUser_all_precise"
,
"on_click_diary_card"
,
five_days
)
var_ctr_new_precise_D
=
collect_data
(
var_ctr_new_precise_data
)
var_ctr_new_precise_D
=
collect_data
(
var_ctr_new_precise_data
)
var_ctr_new_precise
=
np
.
var
(
var_ctr_new_precise_D
)
var_ctr_new_precise
=
np
.
var
(
var_ctr_new_precise_D
)
mean_var_ctr_new_precise
=
np
.
mean
(
var_ctr_new_precise_D
)
mean_var_ctr_new_precise
=
np
.
mean
(
var_ctr_new_precise_D
)
print
(
"【7】新用户精准曝光CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_new_precise
))
print
(
"【7
-1
】新用户精准曝光CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_new_precise
))
print
(
"新用户精准曝光CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_new_precise
))
print
(
"
【7-2】
新用户精准曝光CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_new_precise
))
print
(
"
\n
"
)
print
(
"
\n
"
)
var_ctr_old_precise_data
=
get_var_data2
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all_precise"
,
"on_click_diary_card"
,
five_days
)
var_ctr_old_precise_data
=
get_var_data2
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all_precise"
,
"on_click_diary_card"
,
five_days
)
var_ctr_old_precise_D
=
collect_data
(
var_ctr_old_precise_data
)
var_ctr_old_precise_D
=
collect_data
(
var_ctr_old_precise_data
)
var_ctr_old_precise
=
np
.
var
(
var_ctr_old_precise_D
)
var_ctr_old_precise
=
np
.
var
(
var_ctr_old_precise_D
)
mean_var_ctr_old_precise
=
np
.
mean
(
var_ctr_old_precise_D
)
mean_var_ctr_old_precise
=
np
.
mean
(
var_ctr_old_precise_D
)
print
(
"【8】老用户精准曝光CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_old_precise
))
print
(
"【8
-1
】老用户精准曝光CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_old_precise
))
print
(
"老用户精准曝光CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_old_precise
))
print
(
"
【8-2】
老用户精准曝光CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_old_precise
))
print
(
"
\n
"
)
print
(
"
\n
"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment