Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
14dacc27
Commit
14dacc27
authored
Dec 25, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
add device search data
parents
d17ffd4a
1e191aec
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
57 additions
and
8 deletions
+57
-8
applist.py
tensnsorflow/applist.py
+57
-8
No files found.
tensnsorflow/applist.py
View file @
14dacc27
import
pandas
as
pd
import
pandas
as
pd
import
pymysql
import
pymysql
import
json
import
json
from
sqlalchemy
import
create_engine
def
con_sql
(
db
,
sql
):
def
con_sql
(
db
,
sql
):
cursor
=
db
.
cursor
()
cursor
=
db
.
cursor
()
...
@@ -15,20 +16,68 @@ def con_sql(db,sql):
...
@@ -15,20 +16,68 @@ def con_sql(db,sql):
db
.
close
()
db
.
close
()
return
df
return
df
def
is_json
(
myjson
):
try
:
json
.
loads
(
myjson
)
except
ValueError
:
return
False
return
True
def
json_format
(
x
):
def
json_format
(
x
):
a
=
json
.
loads
(
x
)
a
=
json
.
loads
(
x
)
return
set
([
x
[
"appName"
]
for
x
in
a
])
return
set
([
x
[
"appName"
]
for
x
in
a
])
def
sort_app
():
def
sort_app
():
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_prod'
)
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_prod'
)
sql
=
"select device_id,app_list from device_id_applist limit 6"
sql
=
"select device_id,app_list from device_id_applist"
df
=
con_sql
(
db
,
sql
)
df
=
con_sql
(
db
,
sql
)
.
dropna
()
a
=
df
[
1
]
.
values
.
tolist
()
df
=
df
.
rename
(
columns
=
{
0
:
"device_id"
,
1
:
"app_list"
})
print
(
type
(
a
[
0
]))
df
=
df
.
loc
[
df
[
"app_list"
]
.
apply
(
is_json
)]
df
[
1
]
=
df
[
1
]
.
apply
(
json_format
)
dianshang
=
{
"美团"
,
"京东"
,
"淘宝"
}
category
=
{
"competitor"
:{
"新氧美容"
},
df
[
2
]
=
df
[
1
]
.
apply
(
lambda
x
:
1
if
len
(
x
&
dianshang
)
>
0
else
0
)
"dianshang"
:{
"京东"
,
"淘宝"
,
"唯品会"
,
"天猫"
,
"苏宁易购"
,
"国美"
,
"当当"
,
"亚马逊"
,
"网易严选"
,
"小米有品"
},
print
(
df
[
2
]
.
unique
())
"kuajing_dianshang"
:
{
"小红书"
,
"网易考拉"
,
"洋码头"
,
"达令全球好货"
,
"海狐海淘"
,
"HIG0"
,
"豌豆公主"
,
"尚品网"
,
"丰趣海淘"
,
"比呀比海外购"
},
"zhibo"
:
{
"YY直播"
,
"映客直播"
,
"花椒直播"
,
"NOW直播"
,
"小米直播"
,
"一直播"
,
"KK直播"
,
"酷狗直播"
,
"来疯直播"
,
"喵播"
},
"youxizhibo"
:
{
"虎牙直播"
,
"斗鱼直播"
,
"熊猫直播"
,
"触手直播"
,
"企鹅电竞"
,
"龙珠直播"
,
"战旗直播"
,
"全民直播"
,
"CC直播"
,
"火猫直播"
},
"short_video"
:
{
"抖音短视频"
,
"快手"
,
"西瓜视频"
,
"火山小视频"
,
"秒拍"
,
"快视频"
,
"小影"
,
"蛙趣视频"
,
"最右"
,
"小咖秀"
},
"meitu"
:
{
"美图秀秀"
,
"美颜相机"
,
"天天P图"
,
"Faceu激萌"
,
"B612咔叽"
,
"in"
,
"相机360"
,
"潮自拍"
,
"玩图"
,
"nice"
},
"tiyu"
:
{
"直播吧"
,
"腾讯体育"
,
"新浪体育"
,
"虎扑体育"
,
"懂球帝"
,
"CCTV5"
,
"疯狂体育"
,
"球探体育比分"
,
"PP体育"
,
"A8体育直播"
},
"read"
:{
"掌阅"
,
"QQ阅读"
,
"咪咕阅读"
,
"书旗小说"
,
"多看阅读"
,
"追书神器"
,
"搜狗阅读"
,
"微信读书"
,
"起点小说"
,
"宜搜小说"
},
"finance"
:
{
"21财经"
,
"华尔街见闻"
,
"新浪财经"
,
"时代财经"
,
"和讯财经"
,
"第一财经"
,
"FT中文网"
,
"财经杂志"
,
"财新"
,
"央视财经"
},
"fashion_clothes"
:
{
"蘑菇街"
,
"聚美优品"
,
"美丽说"
,
"楚楚街"
,
"穿衣助手"
,
"有货"
,
"优品惠"
,
"优购时尚商城"
,
"走秀奢侈品"
},
"muying"
:
{
"贝贝网"
,
"蜜芽"
,
"孩子王"
,
"妈妈100"
,
"大V店"
,
"宝贝格子"
,
"乐友"
,
"母婴之家"
,
"国际妈咪海淘母婴商城"
,
"美囤妈妈"
,
"妈妈网孕育"
,
"宝宝树孕育"
,
"辣妈帮"
,
"亲宝宝"
,
"宝宝知道"
,
"妈妈社区"
,
"妈妈帮"
,
"柚宝宝"
,
"育儿宝"
},
"fresh"
:
{
"每日优鲜"
,
"京东到家"
,
"天天果园"
,
"中粮我买网"
,
"本来生活"
,
"手机惠农"
,
"盒马"
,
"顺丰优选"
,
"百果园"
,
"易果生鲜"
},
"bijia"
:
{
"美团"
,
"拼多多"
,
"折800"
,
"返利网"
,
"卷皮折扣"
,
"淘粉吧"
,
"聚划算"
,
"一淘"
,
"网购联盟"
,
"返利淘联盟"
,
"什么值得买"
,
"花生日记"
},
"travel"
:
{
"携程旅行"
,
"去哪儿旅行"
,
"同程旅游"
,
"途牛旅游"
,
"飞猪"
,
"马蜂窝旅游"
,
"艺龙旅行"
,
"驴妈妈旅游"
,
"TripAdvisor猫途鹰"
,
"美团旅行"
},
"airplane"
:
{
"航班管家"
,
"飞常准"
,
"航旅纵横"
,
"春秋航空"
,
"南方航空"
,
"中国国航"
,
"东方航空"
,
"海南航空"
,
"深圳航空"
,
"四川航空"
},
"love"
:
{
"百合婚恋"
,
"世纪佳缘"
,
"珍爱网"
,
"牵手婚恋"
,
"探探"
,
"热恋"
,
"有缘网"
,
"约会吧"
,
"约爱"
,
"快约爱"
},
"stock"
:
{
"同花顺炒股票"
,
"大智慧"
,
"涨乐财富通"
,
"腾讯自选股"
,
"广发证券易淘金"
,
"金太阳"
,
"国泰君安君弘"
,
"海通e海通财"
,
"平安证券"
,
"同花顺"
},
"car"
:
{
"平安好车主"
,
"途虎养车"
,
"车主无忧"
,
"汽车超人"
,
"车e族"
,
"汽修宝"
,
"车点点"
,
"汽车大师"
,
"乐车邦"
,
"车享家"
},
"child"
:
{
"小伴龙"
,
"儿歌多多"
,
"宝宝巴士奇妙屋"
,
"智慧树"
,
"贝瓦儿歌"
,
"儿歌点点"
,
"宝贝听听"
,
"宝宝小厨房"
,
"宝宝游乐园"
,
"叽里呱啦"
},
"homework"
:
{
"作业帮"
,
"小猿搜题"
,
"一起作业学生端"
,
"学霸君"
,
"互动作业"
,
"猿题库"
,
"纳米盒"
,
"阿凡题"
,
"洋葱数学"
},
"work"
:
{
"钉钉"
,
"企业微信"
,
"移动彩云"
,
"云之家"
,
"今目标"
,
"口袋助理"
,
"推事本"
,
"奇鱼微办公"
,
"工作圈"
,
"明道"
},
"home"
:
{
"最美装修"
,
"齐家网"
,
"土巴兔装修"
,
"装修头条"
,
"装修管家"
,
"窝牛装修"
,
"丽芙家居"
,
"酷家乐装修"
,
"惠装装修"
,
"房天下装修"
},
"job"
:
{
"智联招聘"
,
"前程无忧"
,
"斗米"
,
"拉勾"
,
"Boss直聘"
,
"猎聘同道"
,
"智联招聘"
}
}
df
[
"app_list"
]
=
df
[
"app_list"
]
.
apply
(
json_format
)
for
i
in
category
.
keys
():
df
[
i
]
=
df
[
"app_list"
]
.
apply
(
lambda
x
:
1
if
len
(
x
&
category
[
i
])
>
0
else
0
)
print
(
i
)
print
(
df
[
i
]
.
unique
())
df
=
df
.
drop
(
"app_list"
,
axis
=
1
)
yconnect
=
create_engine
(
'mysql+pymysql://root:3SYz54LS9#^9sBvC@10.66.157.22:4000/jerry_test?charset=utf8'
)
pd
.
io
.
sql
.
to_sql
(
df
,
"app_list_sort"
,
yconnect
,
schema
=
'jerry_test'
,
if_exists
=
'replace'
,
index
=
False
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
sort_app
()
sort_app
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment