Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
serviceRec
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
郭羽
serviceRec
Commits
0cb723f3
Commit
0cb723f3
authored
3 years ago
by
郭羽
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
service model 优化
parent
d142d185
master
No related merge requests found
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
2 deletions
+4
-2
featureEng2.py
spark/featureEng2.py
+4
-2
No files found.
spark/featureEng2.py
View file @
0cb723f3
...
@@ -82,6 +82,7 @@ numberToBucketUdf = F.udf(numberToBucket, StringType())
...
@@ -82,6 +82,7 @@ numberToBucketUdf = F.udf(numberToBucket, StringType())
priceToBucketUdf
=
F
.
udf
(
priceToBucket
,
StringType
())
priceToBucketUdf
=
F
.
udf
(
priceToBucket
,
StringType
())
def
addItemStaticFeatures
(
samples
,
itemDF
,
dataVocab
):
def
addItemStaticFeatures
(
samples
,
itemDF
,
dataVocab
):
# item不设置over窗口,原因:item可能一直存在,统计数据按照最新即可
print
(
"item统计特征处理..."
)
print
(
"item统计特征处理..."
)
staticFeatures
=
samples
.
groupBy
(
'item_id'
)
.
agg
(
F
.
count
(
F
.
lit
(
1
))
.
alias
(
'itemRatingCount'
),
staticFeatures
=
samples
.
groupBy
(
'item_id'
)
.
agg
(
F
.
count
(
F
.
lit
(
1
))
.
alias
(
'itemRatingCount'
),
F
.
avg
(
F
.
col
(
'rating'
))
.
alias
(
'itemRatingAvg'
),
F
.
avg
(
F
.
col
(
'rating'
))
.
alias
(
'itemRatingAvg'
),
...
@@ -118,6 +119,7 @@ def addItemStaticFeatures(samples,itemDF,dataVocab):
...
@@ -118,6 +119,7 @@ def addItemStaticFeatures(samples,itemDF,dataVocab):
print
(
"item size:"
,
staticFeatures
.
count
())
print
(
"item size:"
,
staticFeatures
.
count
())
staticFeatures
.
show
(
5
,
truncate
=
False
)
return
staticFeatures
return
staticFeatures
def
addUserStaticsFeatures
(
samples
,
dataVocab
):
def
addUserStaticsFeatures
(
samples
,
dataVocab
):
...
@@ -880,7 +882,7 @@ if __name__ == '__main__':
...
@@ -880,7 +882,7 @@ if __name__ == '__main__':
itemDF_spark
.
printSchema
()
itemDF_spark
.
printSchema
()
itemDF_spark
.
show
(
10
,
truncate
=
False
)
itemDF_spark
.
show
(
10
,
truncate
=
False
)
#
user
统计特征处理
#
item
统计特征处理
itemStaticDF
=
addItemStaticFeatures
(
ratingSamplesWithLabel
,
itemDF_spark
,
dataVocab
)
itemStaticDF
=
addItemStaticFeatures
(
ratingSamplesWithLabel
,
itemDF_spark
,
dataVocab
)
# 统计数据处理
# 统计数据处理
...
@@ -897,7 +899,7 @@ if __name__ == '__main__':
...
@@ -897,7 +899,7 @@ if __name__ == '__main__':
user_columns
=
[
c
for
c
in
samplesWithUserFeatures
.
columns
if
c
.
startswith
(
"user"
)]
user_columns
=
[
c
for
c
in
samplesWithUserFeatures
.
columns
if
c
.
startswith
(
"user"
)]
print
(
"collect feature for user:{}"
.
format
(
str
(
user_columns
)))
print
(
"collect feature for user:{}"
.
format
(
str
(
user_columns
)))
# item columns
# item columns
item_columns
=
addItemStaticFeatures
.
columns
item_columns
=
itemStaticDF
.
columns
print
(
"collect feature for item:{}"
.
format
(
str
(
item_columns
)))
print
(
"collect feature for item:{}"
.
format
(
str
(
item_columns
)))
# model columns
# model columns
print
(
"model columns to redis..."
)
print
(
"model columns to redis..."
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment