Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
serviceRec
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
郭羽
serviceRec
Commits
c39b62ac
Commit
c39b62ac
authored
3 years ago
by
宋柯
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
模型调试
parent
3c2d3779
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
11 deletions
+6
-11
featureEngSk.py
spark/featureEngSk.py
+6
-11
No files found.
spark/featureEngSk.py
View file @
c39b62ac
...
...
@@ -508,10 +508,10 @@ def getEsConn():
def
getClickSql
(
start
,
end
):
sql
=
"""
SELECT
DISTINCT t1.partition_date, t1.cl_id device_id, t1.card_id,t1.time_stamp,t1.cl_type as os,
t1.city_id as user_city_id
SELECT
t1.partition_date, t1.cl_id device_id, t1.card_id, t1.cl_type as os,
t1.city_id as user_city_id
FROM
(
select partition_date,city_id,cl_id,business_id as card_id,
time_stamp,
page_stay,cl_type
select partition_date,city_id,cl_id,business_id as card_id,page_stay,cl_type
from online.bl_hdfs_maidian_updates
where action = 'page_view'
AND partition_date>='{startDay}' and partition_date<='{endDay}'
...
...
@@ -521,7 +521,7 @@ def getClickSql(start, end):
AND cl_id != ''
AND business_id is not null
AND business_id != ''
group by partition_date,city_id,cl_id,business_id,
time_stamp,
page_stay,cl_type
group by partition_date,city_id,cl_id,business_id,page_stay,cl_type
) AS t1
join
( --渠道,新老
...
...
@@ -539,7 +539,6 @@ def getClickSql(start, end):
AND first_channel_source_type not like 'promotion
\
_jf
\
_
%
'
) t2
on t1.cl_id = t2.device_id
LEFT JOIN
( --去除黑名单
select distinct device_id
...
...
@@ -556,10 +555,10 @@ def getClickSql(start, end):
def
getExposureSql
(
start
,
end
):
# t1.partition_date, t1.cl_id device_id, t1.card_id, t1.time_stamp, t1.cl_type as os, t1.city_id as user_city_id
sql
=
"""
SELECT
DISTINCT t1.partition_date,t1.cl_id device_id,t1.card_id,t1.time_stamp,cl_type as os,
t1.city_id as user_city_id
SELECT
t1.partition_date, t1.cl_id device_id, t1.card_id, cl_type as os,
t1.city_id as user_city_id
from
( --新首页卡片曝光
SELECT partition_date,city_id,cl_type,cl_id,card_id
,max(time_stamp) as time_stamp
SELECT partition_date,city_id,cl_type,cl_id,card_id
FROM online.ml_community_precise_exposure_detail
where partition_date>='{startDay}' and partition_date<='{endDay}'
and action in ('page_precise_exposure','home_choiceness_card_exposure')
...
...
@@ -574,7 +573,6 @@ def getExposureSql(start, end):
and card_content_type in ('service')
and (get_json_object(exposure_card,'$.in_page_pos') is null or get_json_object(exposure_card,'$.in_page_pos') != 'seckill')
group by partition_date,city_id,cl_type,cl_id,card_id,app_session_id
) t1
join
( --渠道,新老
...
...
@@ -592,7 +590,6 @@ def getExposureSql(start, end):
AND first_channel_source_type not like 'promotion
\
_jf
\
_
%
'
) t2
on t1.cl_id = t2.device_id
LEFT JOIN
( --去除黑名单
select distinct device_id
...
...
@@ -913,9 +910,7 @@ def get_click_exp_rating_df(trainDays, spark):
clickDF
=
clickDF
.
withColumn
(
"label"
,
F
.
lit
(
1
))
expDF
=
expDF
.
withColumn
(
"label"
,
F
.
lit
(
0
))
ratingDF
=
clickDF
.
union
(
expDF
)
ratingDF
=
ratingDF
.
withColumnRenamed
(
"time_stamp"
,
"timestamp"
)
\
.
withColumn
(
"user_city_id"
,
F
.
when
(
F
.
col
(
"user_city_id"
)
.
isNull
(),
"-1"
)
.
otherwise
(
F
.
col
(
"user_city_id"
)))
\
.
withColumn
(
"timestamp"
,
F
.
col
(
"timestamp"
)
.
cast
(
"long"
))
ratingDF
=
ratingDF
.
withColumn
(
"user_city_id"
,
F
.
when
(
F
.
col
(
"user_city_id"
)
.
isNull
(),
"-1"
)
.
otherwise
(
F
.
col
(
"user_city_id"
)))
ratingDF
.
cache
()
print
(
"ratingDF.columns: {}"
.
format
(
ratingDF
.
columns
))
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment