1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
---插入每天数据
SELECT mas.partition_date day_id
,device_os_type
,active_type
,grey_type
,case when t1.page_name in ('post_detail','user_post_detail','doctor_post_detail') then '帖子详情页'
when t1.page_name in ('diary_detail') then '日记本详情页'
when t1.page_name in ('topic_detail') then '日记帖详情页'
when t1.page_name in ('question_answer_detail') then '问答详情页' end as page_name
,nvl(sum(t1.pv),0) as content_pv
,nvl(count(distinct t1.cl_id),0) as content_uv
,nvl(sum(t4.pv),0) as wel_exp_pv
,nvl(sum(t5.pv),0) as content_exp_pv
,nvl(sum(t2.pv),0) as wel_click_pv
,nvl(sum(t3.pv),0) as content_click_pv
,nvl(sum(t6.pv),0) as slide_wel_click_pv
,nvl(sum(t7.pv),0) as self_wel_click_pv
FROM
(
SELECT
partition_date,device_id,device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,CASE WHEN substr(convup(setencryption(device_id,'sha-1'),16,10),-1) in ('0','1','2','3','4') THEN '灰度' ELSE '非灰' END AS grey_type
FROM online.ml_device_day_active_status
where partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
AND first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not like 'promotion\_jf\_%'
)mas
join
(
SELECT partition_date,cl_id,page_name,count(cl_id) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
AND action='page_view'
AND int(split(app_version,'\\.')[1]) >= 30
group by partition_date,cl_id,page_name
)t1
on mas.partition_date=t1.partition_date
and mas.device_id=t1.cl_id
left join
(--你可能喜欢美购卡片点击
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
and params['card_content_type']='service'
and params['in_page_pos']='bottom'
and params['tab_name'] is null--与横划区域区分
and ((params['position'] is not null and cl_type='android')
or (params['card_but_pos'] is null and cl_type='ios')) --与关联卡片区分
AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
AND int(split(app_version,'\\.')[1]) >= 30
group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
)a
group by partition_date,cl_id,page_name
)t2
on t1.partition_date=t2.partition_date
and t1.cl_id=t2.cl_id
and t1.page_name=t2.page_name
left join
(--你可能喜欢内容卡片点击
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
and params['card_content_type']<>'service'
and params['in_page_pos']='bottom'
and params['tab_name'] is null
AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
AND int(split(app_version,'\\.')[1]) >= 30
group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
)a
group by partition_date,cl_id,page_name
)t3
on t1.partition_date=t3.partition_date
and t1.cl_id=t3.cl_id
and t1.page_name=t3.page_name
left join
(--你可能喜欢美购卡片曝光
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
select partition_date,cl_id,page_name,business_id,card_id,app_session_id
from online.ml_community_precise_exposure_detail
where partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
and card_content_type='service'
and action ='page_precise_exposure'
and params['tab_name']='feed_recommend'
and params['in_page_pos']='bottom'
and int(split(app_version,'\\.')[1]) >= 30
and is_exposure='1'
group by partition_date,cl_id,page_name,business_id,card_id,app_session_id
)a
group by partition_date,cl_id,page_name
)t4
on t1.partition_date=t4.partition_date
and t1.cl_id=t4.cl_id
and t1.page_name=t4.page_name
left join
(--你可能喜欢内容卡片曝光
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
select partition_date,cl_id,page_name,business_id,card_id,app_session_id
from online.ml_community_precise_exposure_detail
where partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
and card_content_type in ('diary','user_post','answer')
and action ='page_precise_exposure'
and params['tab_name']='feed_recommend'
and params['in_page_pos']='bottom'
and int(split(app_version,'\\.')[1]) >= 30
and is_exposure='1'
group by partition_date,cl_id,page_name,business_id,card_id,app_session_id
)a
group by partition_date,cl_id,page_name
)t5
on t1.partition_date=t5.partition_date
and t1.cl_id=t5.cl_id
and t1.page_name=t5.page_name
left join
(--横划美购卡片点击
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
and (( action='on_click_button' and params['button_name'] = 'more_recommendations')
or (action ='on_click_card' and params['in_page_pos']='recommand' and params['card_content_type']='service')
or (action ='on_click_card' and params['tab_name']='hot_recommendation' and params['card_content_type']='service'))
and int(split(app_version,'\\.')[1]) >= 30
group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
)a
group by partition_date,cl_id,page_name
)t6
on t1.partition_date=t6.partition_date
and t1.cl_id=t6.cl_id
and t1.page_name=t6.page_name
left join
(--关联卡片
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (get_json_object(params['extra_param'], '$.type')='交互栏'
or get_json_object(params['extra_param'], '$.jump_from')='msg_link'
or params['in_page_pos']='top'
or (params['in_page_pos']='bottom' and params['position'] is null and cl_type='android')
or (params['in_page_pos']='bottom' and params['card_but_pos'] is not null and cl_type='ios'))
AND action='on_click_card'
and params['card_content_type']='service'
AND page_name in ('diary_detail','topic_detail')
AND params['position'] is null
and int(split(app_version,'\\.')[1]) >= 30
group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
)a
group by partition_date,cl_id,page_name
)t7
on t1.partition_date=t7.partition_date
and t1.cl_id=t7.cl_id
and t1.page_name=t7.page_name
LEFT JOIN
(
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
union all
select distinct device_id
from dim.dim_device_user_staff --去除内网用户
)spam_pv
on spam_pv.device_id=mas.device_id
LEFT JOIN
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)t1
JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)t2
on t1.user_id=t2.user_id
group by partition_date,device_id
)dev
on mas.partition_date=dev.partition_date and mas.device_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is null
group by mas.partition_date
,device_os_type,active_type,grey_type
,case when t1.page_name in ('post_detail','user_post_detail','doctor_post_detail') then '帖子详情页'
when t1.page_name in ('diary_detail') then '日记本详情页'
when t1.page_name in ('topic_detail') then '日记帖详情页'
when t1.page_name in ('question_answer_detail') then '问答详情页' end
order by 1,2,3,4,5;
--引用数据
SELECT
day_id as `日期`,
device_os_type as `设备类型`,
active_type as `活跃类型`,
grey_type as `灰度`,
page_name as `页面`,
NVL(CONCAT(ROUND(wel_click_pv/wel_exp_pv*100,2),'%'),0) as `内容页你可能喜欢美购卡片点击PV/该类卡片曝光PV`,
NVL(CONCAT(ROUND(content_click_pv/content_exp_pv*100,2),'%'),0) as `内容页你可能喜欢内容卡片点击PV/该类卡片曝光PV`,
content_pv as `内容详情页pv`,
content_uv as `内容详情页uv`,
wel_exp_pv as `你可能喜欢美购卡片曝光pv`,
content_exp_pv as `你可能喜欢内容卡片曝光pv`,
wel_click_pv as `你可能喜欢美购卡片点击pv`,
content_click_pv as `你可能喜欢内容卡片点击pv`,
slide_wel_click_pv as `横划卡片点击pv`,
self_wel_click_pv as `关联卡片点击pv`
FROM pm.tl_pm_contentpage_ctr
WHERE partition_day>='20200730' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期` desc,`设备类型`,`活跃类型`,`灰度`,`页面`
sparkConf.set("prod.jerry.jdbcuri", "jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
CREATE TABLE conent_detail_page_grayscale_ctr(
day_id varchar(100),
device_os_type varchar(100),
active_type varchar(100),
grey_type varchar(100),
page_name varchar(100),
content_pv BIGINT(20),
content_uv BIGINT(20),
wel_exp_pv BIGINT(20),
content_exp_pv BIGINT(20),
wel_click_pv BIGINT(20),
content_click_pv BIGINT(20),
slide_wel_click_pv BIGINT(20),
self_wel_click_pv BIGINT(20),
partition_day varchar(100),
neirong_ctr FLOAT,
meigou_ctr FLOAT,
grey_neirong_ctr FLOAT,
grey_meigou_ctr FLOAT,
pid varchar(100),
PRIMARY KEY ( pid )
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
alert
select * from conent_detail_page_grayscale_ctr as s1 left join conent_detail_page_grayscale_ctr as s2 on s1.device_os_type = s2.device_os_type and s1.active_type = s2.active_type and page_name