Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
fbbd84f7
Commit
fbbd84f7
authored
Feb 21, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
数据指标波动假设检验统计
parent
5cf5f5de
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
44 additions
and
25 deletions
+44
-25
hypothesis_test.py
eda/recommended_indexs/hypothesis_test.py
+44
-25
No files found.
eda/recommended_indexs/hypothesis_test.py
View file @
fbbd84f7
...
...
@@ -10,6 +10,8 @@ import smtplib
from
email.mime.text
import
MIMEText
from
email.utils
import
formataddr
f
=
open
(
'/srv/apps/ffm-baseline/eda/recommended_indexs/test.txt'
,
'w'
)
#########推荐策略前后统计指标假设检验(t检验)###############
#自动获取昨日日期
...
...
@@ -23,7 +25,7 @@ def get_yesterday_date():
yesterday
=
yesterday
.
strftime
(
"
%
Y-
%
m-
%
d"
)
return
yesterday
yesterday
=
get_yesterday_date
()
print
(
yesterday
)
print
(
"监测数据日期:{}"
.
format
(
yesterday
)
)
#自动获取10日前的日期
def
get_somedate
():
#自动获取10日前的日期,如"2018-07-28"
...
...
@@ -119,6 +121,7 @@ def t_test(x,y): #进行t检验
print
(
"
\n
"
)
else
:
print
(
"策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}"
.
format
(
t_p_value
))
print
(
"
\n
"
)
else
:
#认为数据方差不具有齐性,equal_var=false
t_test
=
ttest_ind
(
x
,
y
,
equal_var
=
False
)
t_p_value
=
t_test
[
1
]
...
...
@@ -128,6 +131,7 @@ def t_test(x,y): #进行t检验
print
(
"
\n
"
)
else
:
print
(
"策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}"
.
format
(
t_p_value
))
print
(
"
\n
"
)
#
# ###假设检验,判断是否具有显著性
#
...
...
@@ -233,8 +237,10 @@ def chi_cal(data):
#查表发现阈值为3.84
if
X
>
3.84
:
print
(
"数据波动较大,超出正常波动范围,95
%
可能性属于指标显著变化,请关注"
)
print
(
"
\n
"
)
else
:
print
(
"数据波动较小,95
%
可能性属于正常波动范围"
)
print
(
"
\n
"
)
#老用户精准点击曝光数据(首页精选日记本列表on_click_diary_card)
print
(
"【1】(精准曝光)首页精选日记本列表老用户ctr数据波动假设检验结果:"
)
...
...
@@ -324,6 +330,7 @@ var_ctcvr_old=np.var(var_ctcvr_old_D)
mean_var_ctcvr_old
=
np
.
mean
(
var_ctcvr_old_D
)
print
(
"【1】老用户CT-CVR数据波动5日内方差检验结果:{}"
.
format
(
var_ctcvr_old
))
print
(
"老用户CT-CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctcvr_old
))
print
(
"
\n
"
)
var_ctcvr_new_data
=
get_var_data1
(
"diary_meigou_newUser"
,
"diary_exp_newUser"
,
"diary_meigou_crv"
,
five_days
)
...
...
@@ -332,6 +339,7 @@ var_ctcvr_new=np.var(var_ctcvr_new_D)
mean_var_ctcvr_new
=
np
.
mean
(
var_ctcvr_new_D
)
print
(
"【2】新用户CT-CVR数据波动5日内方差检验结果:{}"
.
format
(
var_ctcvr_new
))
print
(
"新用户CT-CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctcvr_new
))
print
(
"
\n
"
)
var_cvr_old_data
=
get_var_data1
(
"diary_meigou_oldUser"
,
"diary_clk_oldUser"
,
"diary_meigou_crv"
,
five_days
)
var_cvr_old_D
=
collect_data
(
var_cvr_old_data
)
...
...
@@ -339,6 +347,7 @@ var_cvr_old=np.var(var_cvr_old_D)
mean_var_cvr_old
=
np
.
mean
(
var_cvr_old_D
)
print
(
"【3】老用户CVR数据波动5日内方差检验结果:{}"
.
format
(
var_cvr_old
))
print
(
"老用户CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_cvr_old
))
print
(
"
\n
"
)
#
var_cvr_new_data
=
get_var_data1
(
"diary_meigou_newUser"
,
"diary_clk_newUser"
,
"diary_meigou_crv"
,
five_days
)
...
...
@@ -347,6 +356,7 @@ var_cvr_new=np.var(var_cvr_new_D)
mean_var_cvr_new
=
np
.
mean
(
var_cvr_new_D
)
print
(
"【4】新用户CVR数据波动5日内方差检验结果:{}"
.
format
(
var_cvr_new
))
print
(
"新用户CVR数据波动5日内均值:{}
%
"
.
format
(
mean_var_cvr_new
))
print
(
"
\n
"
)
var_ctr_old_data
=
get_var_data2
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all"
,
"on_click_diary_card"
,
five_days
)
var_ctr_old_D
=
collect_data
(
var_ctr_old_data
)
...
...
@@ -354,6 +364,7 @@ var_ctr_old=np.var(var_cvr_old_D)
mean_var_ctr_old
=
np
.
mean
(
var_ctr_old_D
)
print
(
"【5】老用户CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_old
))
print
(
"老用户CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_old
))
print
(
"
\n
"
)
var_ctr_new_data
=
get_var_data2
(
"clk_count_newUser_all_a"
,
"clk_count_newUser_all_b"
,
"imp_count_newUser_all"
,
"on_click_diary_card"
,
five_days
)
var_ctr_new_D
=
collect_data
(
var_ctr_new_data
)
...
...
@@ -361,6 +372,7 @@ var_ctr_new=np.var(var_ctr_new_D)
mean_var_ctr_new
=
np
.
mean
(
var_ctr_new_D
)
print
(
"【6】新用户CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_new
))
print
(
"新用户CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_new
))
print
(
"
\n
"
)
var_ctr_new_precise_data
=
get_var_data2
(
"clk_count_newUser_all_a"
,
"clk_count_newUser_all_b"
,
"imp_count_newUser_all_precise"
,
"on_click_diary_card"
,
five_days
)
var_ctr_new_precise_D
=
collect_data
(
var_ctr_new_precise_data
)
...
...
@@ -368,6 +380,7 @@ var_ctr_new_precise=np.var(var_ctr_new_precise_D)
mean_var_ctr_new_precise
=
np
.
mean
(
var_ctr_new_precise_D
)
print
(
"【7】新用户精准曝光CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_new_precise
))
print
(
"新用户精准曝光CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_new_precise
))
print
(
"
\n
"
)
var_ctr_old_precise_data
=
get_var_data2
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all_precise"
,
"on_click_diary_card"
,
five_days
)
var_ctr_old_precise_D
=
collect_data
(
var_ctr_old_precise_data
)
...
...
@@ -375,38 +388,44 @@ var_ctr_old_precise=np.var(var_ctr_old_precise_D)
mean_var_ctr_old_precise
=
np
.
mean
(
var_ctr_old_precise_D
)
print
(
"【8】老用户精准曝光CTR数据波动5日内方差检验结果:{}"
.
format
(
var_ctr_old_precise
))
print
(
"老用户精准曝光CTR数据波动5日内均值:{}
%
"
.
format
(
mean_var_ctr_old_precise
))
print
(
"
\n
"
)
# print(chi_ctr_precise_recently)
# print(chi_ctr_precise_yesterday)
print
(
"============================分割线==================================="
)
#保存文件
##发送邮件
my_sender
=
'gaoyazhe@igengmei.com'
my_pass
=
'VCrKTui99a7ALhiK'
my_user1
=
'wangzhiwei@igengmei.com'
def
mail
():
ret
=
True
try
:
text
=
"Hi!
\n
How are you?
\n
Here is the link you wanted:
\n
http://www.baidu.com"
msg
=
MIMEText
(
text
,
'plain'
,
'utf-8'
)
msg
[
'From'
]
=
formataddr
([
"王志伟"
,
my_sender
])
msg
[
'To'
]
=
my_user1
msg
[
'Subject'
]
=
str
(
datetime
.
date
.
today
())
+
"-esmm多目标模型训练指标统计"
server
=
smtplib
.
SMTP_SSL
(
"smtp.exmail.qq.com"
,
465
)
server
.
login
(
my_sender
,
my_pass
)
server
.
sendmail
(
my_sender
,
[
my_user1
],
msg
.
as_string
())
server
.
quit
()
except
Exception
:
ret
=
False
return
ret
ret
=
mail
()
if
ret
:
print
(
"邮件发送成功"
)
else
:
print
(
"邮件发送失败"
)
#
my_sender='gaoyazhe@igengmei.com'
#
my_pass = 'VCrKTui99a7ALhiK'
#
my_user1='wangzhiwei@igengmei.com'
#
def mail():
#
ret = True
#
try:
#
text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttp://www.baidu.com"
#
msg = MIMEText(text, 'plain', 'utf-8')
#
msg['From'] = formataddr(["王志伟", my_sender])
#
msg['To'] = my_user1
#
msg['Subject'] = str(datetime.date.today()) + "-esmm多目标模型训练指标统计"
#
server = smtplib.SMTP_SSL("smtp.exmail.qq.com", 465)
#
server.login(my_sender, my_pass)
#
server.sendmail(my_sender, [my_user1], msg.as_string())
#
server.quit()
#
except Exception:
#
ret=False
#
return ret
#
#
ret=mail()
#
if ret:
#
print("邮件发送成功")
#
else:
#
print("邮件发送失败")
# chi_cvr_new=
# chi_cvr_old=
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment