Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
ee6d3df4
Commit
ee6d3df4
authored
Feb 20, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
数据指标波动假设检验统计
parent
039d19a0
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
49 additions
and
9 deletions
+49
-9
hypothesis_test.py
eda/recommended_indexs/hypothesis_test.py
+49
-9
No files found.
eda/recommended_indexs/hypothesis_test.py
View file @
ee6d3df4
...
@@ -297,39 +297,79 @@ chi_cal(ctcvr_tst_new)
...
@@ -297,39 +297,79 @@ chi_cal(ctcvr_tst_new)
# ###############数据波动大小检验##############
# ###############数据波动大小检验##############
print
(
"============================分割线==================================="
)
print
(
"============================分割线==================================="
)
def
get_var_data
(
x
,
y
,
z
,
t1
):
def
get_var_data
1
(
x
,
y
,
z
,
t1
):
sql_cid
=
"select {0}/{1} from {2}
\
sql_cid
=
"select {0}/{1} from {2}
\
where stat_date >= '{3}' "
.
format
(
x
,
y
,
z
,
t1
)
where stat_date >= '{3}' "
.
format
(
x
,
y
,
z
,
t1
)
CVR_DATA_recently
=
con_sql
(
sql_cid
)
CVR_DATA_recently
=
con_sql
(
sql_cid
)
return
CVR_DATA_recently
return
CVR_DATA_recently
def
get_var_data2
(
x
,
y
,
z
,
q
,
t1
):
sql_cid
=
"select ({0}+{1})/{2} from {3}
\
where stat_date >= '{4}' "
.
format
(
x
,
y
,
z
,
q
,
t1
)
CVR_DATA_recently
=
con_sql
(
sql_cid
)
return
CVR_DATA_recently
def
collect_data
(
data
):
def
collect_data
(
data
):
tt
=
[
float
(
data
[
i
][
0
])
*
100
for
i
in
range
(
len
(
data
))]
tt
=
[
float
(
data
[
i
][
0
])
*
100
for
i
in
range
(
len
(
data
))]
return
tt
return
tt
var_ctcvr_old_data
=
get_var_data
(
"diary_meigou_oldUser"
,
"diary_exp_oldUser"
,
"diary_meigou_crv"
,
five_days
)
var_ctcvr_old_data
=
get_var_data
1
(
"diary_meigou_oldUser"
,
"diary_exp_oldUser"
,
"diary_meigou_crv"
,
five_days
)
var_ctcvr_old_D
=
collect_data
(
var_ctcvr_old_data
)
var_ctcvr_old_D
=
collect_data
(
var_ctcvr_old_data
)
var_ctcvr_old
=
np
.
var
(
var_ctcvr_old_D
)
var_ctcvr_old
=
np
.
var
(
var_ctcvr_old_D
)
mean_var_ctcvr_old
=
np
.
mean
(
var_ctcvr_old_D
)
print
(
"【1】老用户CT-CVR数据波动5日内方差检验结果:{}"
.
format
(
var_ctcvr_old
))
print
(
"【1】老用户CT-CVR数据波动5日内方差检验结果:{}"
.
format
(
var_ctcvr_old
))
print
(
"老用户CT-CVR数据波动5日内均值:{}"
.
format
(
mean_var_ctcvr_old
))
var_ctcvr_new_data
=
get_var_data
(
"diary_meigou_newUser"
,
"diary_exp_newUser"
,
"diary_meigou_crv"
,
five_days
)
var_ctcvr_new_data
=
get_var_data
1
(
"diary_meigou_newUser"
,
"diary_exp_newUser"
,
"diary_meigou_crv"
,
five_days
)
var_ctcvr_new_D
=
collect_data
(
var_ctcvr_new_data
)
var_ctcvr_new_D
=
collect_data
(
var_ctcvr_new_data
)
var_ctcvr_new
=
np
.
var
(
var_ctcvr_new_D
)
var_ctcvr_new
=
np
.
var
(
var_ctcvr_new_D
)
mean_var_ctcvr_new
=
np
.
mean
(
var_ctcvr_new_D
)
print
(
"【2】新用户CT-CVR数据波动5日内方差检验结果:{}"
.
format
(
var_ctcvr_new
))
print
(
"【2】新用户CT-CVR数据波动5日内方差检验结果:{}"
.
format
(
var_ctcvr_new
))
print
(
"新用户CT-CVR数据波动5日内均值:{}"
.
format
(
mean_var_ctcvr_new
))
var_cvr_old_data
=
get_var_data
(
"diary_meigou_oldUser"
,
"diary_clk_oldUser"
,
"diary_meigou_crv"
,
five_days
)
var_cvr_old_data
=
get_var_data
1
(
"diary_meigou_oldUser"
,
"diary_clk_oldUser"
,
"diary_meigou_crv"
,
five_days
)
var_cvr_old_D
=
collect_data
(
var_cvr_old_data
)
var_cvr_old_D
=
collect_data
(
var_cvr_old_data
)
var_cvr_old
=
np
.
var
(
var_cvr_old_D
)
var_cvr_old
=
np
.
var
(
var_cvr_old_D
)
mean_var_cvr_old
=
np
.
mean
(
var_cvr_old_D
)
print
(
"【3】老用户CVR数据波动5日内方差检验结果:{}"
.
format
(
var_cvr_old
))
print
(
"【3】老用户CVR数据波动5日内方差检验结果:{}"
.
format
(
var_cvr_old
))
print
(
"老用户CVR数据波动5日内均值:{}"
.
format
(
mean_var_cvr_old
))
#
#
var_cvr_new_data
=
get_var_data
(
"diary_meigou_newUser"
,
"diary_clk_newUser"
,
"diary_meigou_crv"
,
five_days
)
var_cvr_new_data
=
get_var_data
1
(
"diary_meigou_newUser"
,
"diary_clk_newUser"
,
"diary_meigou_crv"
,
five_days
)
var_cvr_new_D
=
collect_data
(
var_cvr_new_data
)
var_cvr_new_D
=
collect_data
(
var_cvr_new_data
)
var_cvr_new
=
np
.
var
(
var_cvr_new_D
)
var_cvr_new
=
np
.
var
(
var_cvr_new_D
)
mean_var_cvr_new
=
np
.
mean
(
var_cvr_new_D
)
print
(
"【4】新用户CVR数据波动5日内方差检验结果:{}"
.
format
(
var_cvr_new
))
print
(
"【4】新用户CVR数据波动5日内方差检验结果:{}"
.
format
(
var_cvr_new
))
print
(
"新用户CVR数据波动5日内均值:{}"
.
format
(
mean_var_cvr_new
))
var_ctr_old_data
=
get_var_data2
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all"
,
"on_click_diary_card"
,
five_days
)
var_ctr_old_D
=
collect_data
(
var_ctr_old_data
)
var_ctr_old
=
np
.
var
(
var_cvr_old_D
)
mean_var_ctr_old
=
np
.
mean
(
var_ctr_old_D
)
print
(
"【5】老用户CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_old
))
print
(
"老用户CTR数据波动5日内均值:{}"
.
format
(
mean_var_ctr_old
))
var_ctr_new_data
=
get_var_data2
(
"clk_count_newUser_all_a"
,
"clk_count_newUser_all_b"
,
"imp_count_newUser_all"
,
"on_click_diary_card"
,
five_days
)
var_ctr_new_D
=
collect_data
(
var_ctr_new_data
)
var_ctr_new
=
np
.
var
(
var_ctr_new_D
)
mean_var_ctr_new
=
np
.
mean
(
var_ctr_new_D
)
print
(
"【6】新用户CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_new
))
print
(
"新用户CTR数据波动5日内均值:{}"
.
format
(
mean_var_ctr_new
))
var_ctr_new_precise_data
=
get_var_data2
(
"clk_count_newUser_all_a"
,
"clk_count_newUser_all_b"
,
"imp_count_newUser_all_precise"
,
"on_click_diary_card"
,
five_days
)
var_ctr_new_precise_D
=
collect_data
(
var_ctr_new_precise_data
)
var_ctr_new_precise
=
np
.
var
(
var_ctr_new_precise_D
)
mean_var_ctr_new_precise
=
np
.
mean
(
var_ctr_new_precise_D
)
print
(
"【7】新用户精准曝光CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_new_precise
))
print
(
"新用户精准曝光CTR数据波动5日内均值:{}"
.
format
(
mean_var_ctr_new_precise
))
# var_ctr_new_data=get_var_data("diary_meigou_newUser","diary_clk_newUser","diary_meigou_crv",five_days)
var_ctr_old_precise_data
=
get_var_data2
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all_precise"
,
"on_click_diary_card"
,
five_days
)
# var_cvr_new_D=collect_data(var_cvr_new_data)
var_ctr_old_precise_D
=
collect_data
(
var_ctr_old_precise_data
)
# var_cvr_new=np.var(var_cvr_new_D)
var_ctr_old_precise
=
np
.
var
(
var_ctr_old_precise_D
)
# print("【4】新用户CVR数据波动5日内方差检验结果:{}".format(var_cvr_new))
mean_var_ctr_old_precise
=
np
.
mean
(
var_ctr_old_precise_D
)
print
(
"【8】老用户精准曝光CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_old_precise
))
print
(
"老用户精准曝光CTR数据波动5日内均值:{}"
.
format
(
mean_var_ctr_old_precise
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment