Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
c89eb6de
Commit
c89eb6de
authored
Apr 26, 2019
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改测试文件
parent
e921ddcf
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
82 additions
and
0 deletions
+82
-0
multi.py
tensnsorflow/multi.py
+82
-0
No files found.
tensnsorflow/multi.py
View file @
c89eb6de
...
...
@@ -29,6 +29,7 @@ def multi_hot(df,column,n):
app_list_map
=
dict
(
zip
(
app_list_unique
,
list
(
range
(
n
,
number
+
n
))))
return
number
,
app_list_map
def
feature_engineer
():
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
sql
=
"select max(stat_date) from esmm_train_data"
...
...
@@ -123,6 +124,87 @@ def feature_engineer():
spark
.
createDataFrame
(
test
)
.
write
.
csv
(
'/recommend/va'
,
mode
=
'overwrite'
,
header
=
True
)
spark
.
createDataFrame
(
train
)
.
write
.
csv
(
'/recommend/tr'
,
mode
=
'overwrite'
,
header
=
True
)
return
validate_date
,
value_map
,
app_list_map
,
leve2_map
,
leve3_map
# def get_predict(date,value_map,app_list_map,level2_map,level3_map):
#
# sql = "select e.y,e.z,e.label,e.ucity_id,feat.level2_ids,e.ccity_name," \
# "u.device_type,u.manufacturer,u.channel,c.top,e.device_id,e.cid_id,cut.time," \
# "dl.app_list,e.hospital_id,feat.level3_ids,feat.level2 " \
# "from esmm_pre_data e left join user_feature u on e.device_id = u.device_id " \
# "left join cid_type_top c on e.device_id = c.device_id " \
# "left join cid_time_cut cut on e.cid_id = cut.cid " \
# "left join device_app_list dl on e.device_id = dl.device_id " \
# "left join diary_feat feat on e.cid_id = feat.diary_id"
# df = con_sql(db, sql)
# df = df.rename(columns={0: "y", 1: "z", 2: "label", 3: "ucity_id", 4: "clevel2_id", 5: "ccity_name",
# 6: "device_type", 7: "manufacturer", 8: "channel", 9: "top",10: "device_id",
# 11: "cid_id", 12: "time",13:"app_list",14:"hospital_id",15:"level3_ids",
# 16: "level2"})
#
# db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
# sql = "select level2_id,treatment_method,price_min,price_max,treatment_time,maintain_time,recover_time " \
# "from train_Knowledge_network_data"
# knowledge = con_sql(db, sql)
# knowledge = knowledge.rename(columns={0: "level2", 1: "method", 2: "min", 3: "max",
# 4: "treatment_time", 5: "maintain_time", 6: "recover_time"})
# knowledge["level2"] = knowledge["level2"].astype("str")
#
# df = pd.merge(df, knowledge, on='level2', how='left')
# df = df.drop("level2", axis=1)
# df = df.drop_duplicates(["ucity_id", "clevel2_id", "ccity_name", "device_type", "manufacturer",
# "channel", "top", "time", "app_list", "hospital_id", "level3_ids"])
#
#
# df["stat_date"] = date
# print(df.head(6))
# df["app_list"] = df["app_list"].fillna("lost_na")
# df["app_list"] = df["app_list"].apply(app_list_func,args=(app_list_map,))
# df["clevel2_id"] = df["clevel2_id"].fillna("lost_na")
# df["clevel2_id"] = df["clevel2_id"].apply(app_list_func, args=(level2_map,))
# df["level3_ids"] = df["level3_ids"].fillna("lost_na")
# df["level3_ids"] = df["level3_ids"].apply(app_list_func, args=(level3_map,))
#
# # print("predict shape")
# # print(df.shape)
# df["uid"] = df["device_id"]
# df["city"] = df["ucity_id"]
# features = ["ucity_id", "ccity_name", "device_type", "manufacturer",
# "channel", "top", "time", "stat_date","hospital_id",
# "method", "min", "max", "treatment_time", "maintain_time", "recover_time"]
# for i in features:
# df[i] = df[i].astype("str")
# df[i] = df[i].fillna("lost")
# df[i] = df[i] + i
#
# native_pre = df[df["label"] == 0]
# native_pre = native_pre.drop("label", axis=1)
# nearby_pre = df[df["label"] == 1]
# nearby_pre = nearby_pre.drop("label", axis=1)
#
# for i in ["ucity_id", "ccity_name", "device_type", "manufacturer",
# "channel", "top", "time", "stat_date","hospital_id",
# "method", "min", "max", "treatment_time", "maintain_time", "recover_time"]:
# native_pre[i] = native_pre[i].map(value_map)
# # TODO 没有覆盖到的类别会处理成na,暂时用0填充,后续完善一下
# native_pre[i] = native_pre[i].fillna(0)
#
# nearby_pre[i] = nearby_pre[i].map(value_map)
# # TODO 没有覆盖到的类别会处理成na,暂时用0填充,后续完善一下
# nearby_pre[i] = nearby_pre[i].fillna(0)
#
# print("native")
# print(native_pre.shape)
#
# native_pre[["uid","city","cid_id"]].to_csv(path+"native.csv",index=False)
# write_csv(native_pre, "native",200000)
#
# print("nearby")
# print(nearby_pre.shape)
#
# nearby_pre[["uid","city","cid_id"]].to_csv(path+"nearby.csv",index=False)
# write_csv(nearby_pre, "nearby", 160000)
def
con_sql
(
db
,
sql
):
cursor
=
db
.
cursor
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment