Commit a297c1a1 authored by 张彦钊's avatar 张彦钊

change test fil

parent 397c99b0
......@@ -220,7 +220,7 @@ def feature_engineer():
# TODO 上线后把下面train fliter 删除,因为最近一天的数据也要作为训练集
train = rdd.filter(lambda x: x[0] != validate_date).map(
train = rdd.map(
lambda x: (x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9],
x[10], x[11]))
f = time.time()
......@@ -338,6 +338,7 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
print("nearby tfrecord done")
else:
print("预测集nearby为空")
rdd.unpersist()
if __name__ == '__main__':
......@@ -358,6 +359,6 @@ if __name__ == '__main__':
local_path = "/home/gmuser/esmm/"
validate_date, value_map, app_list_map, leve2_map, leve3_map = feature_engineer()
get_predict(validate_date, value_map, app_list_map, leve2_map, leve3_map)
# get_predict(validate_date, value_map, app_list_map, leve2_map, leve3_map)
spark.stop()
import requests
import re
import time
import random
import json
import urllib3
from lxml import etree
from bs4 import BeautifulSoup
urllib3.disable_warnings()
device_Android = 868771031984211 # 868080041007174
# device_IOS = "B6712382-345D-4B12-343C-5F266411C4CK" # 自己本机
device_IOS = "E417C286-40A4-42F6-BDA9-AEEBD8FEC3B6"
city = "beijing" # beijing
s = requests.session()
def get_d(id):
url = "https://backend.igengmei.com/api/tractate/list?t=1565668444141&version=7.12.6&hybrid=true&channel=App%20Store&current_city_id={}&device_id={}&idfa=B6712382-69D5-4B12-9810-5F266411C4CF&idfv=E6937620-F372-434B-9084-9A9580573838&lat=40.00190424107359&lng=116.4871480941339&platform=iPhone&os_version=11.4.1&json=&tractate_id=42124&start_num=0&from=&data_type=user_post".format(city, device_IOS, id)
a = s.get(url, verify=False)
aa = a.json()
print("请求id为: %s" % id)
return aa
def mysql(device_Android):
db = pymysql.connect(host='172.16.30.136', port=3306, user='doris', passwd='o5gbA27hXHHm', db='doris_prod')
sql = "select queue from device_user_topic_queue where device_id = '{}'".format(device_Android)
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchone()
db.close()
l = result[0].split(",")
print(l)
return l
def get_chapter(id):
"""
topic 帖子
wiki 百科
data 回答
special 专题
diary 日记
live 直播
:return:
"""
error = ''
Jtabtype = 'choice' # 精选
Stabtype = 'home_video' # 视频
Qtabtype = 'tab_operate' # 其他
pre = "https://backend.igengmei.com"
prehera = "https://hera.igengmei.com"
# Android
J = "{}/api/index/v7?offset=&tabtype=choice&tags_id=%5B%5D&tab_id=0&face_simulator_time=&is_7770_homepage_gray=1&app_name=com.wanmeizhensuo.zhensuo&version=7.8.0&platform=android&device_id={}&os_version=8.1.0&model=V1809T&screen=1080x2340&lat=40.00204&lng=116.487055&channel=benzhan&current_city_id={}&manufacturer=vivo&uuid=2b15eed5-5361-4a7a-874d-c6a87d5e0a64&android_device_id=androidid_233708112de9a151".format(pre, device_Android, city)
J2 = "{}/api/index/v7?offset=A%3D5%26C%3D2%26B%3D1%26E%3D1%26D%3D0%26G%3D0%26F%3D0%26I%3D0%26H%3D1%26K%3D0%26J%3D0%26M%3D0%26L%3D0%26O%3D0%26N%3D0%26Q%3D0%26P%3D0%26S%3D0%26R%3D0%26U%3D0%26T%3D0%26W%3D0%26V%3D0%26Y%3D0%26page%3D1&tabtype=choice&tags_id=%5B%5D&tab_id=0&face_simulator_time=&is_7770_homepage_gray=1&app_name=com.wanmeizhensuo.zhensuo&version=7.8.0&platform=android&device_id={}&os_version=8.1.0&model=V1809T&screen=1080x2340&lat=40.00204&lng=116.487055&channel=benzhan&current_city_id={}&manufacturer=vivo&uuid=2b15eed5-5361-4a7a-874d-c6a87d5e0a64&android_device_id=androidid_233708112de9a151".format(pre, device_Android, city)
J3 = "{}/api/index/v7?offset=A%3D11%26C%3D4%26B%3D2%26E%3D2%26D%3D0%26G%3D0%26F%3D0%26I%3D0%26H%3D2%26K%3D0%26J%3D0%26M%3D0%26L%3D0%26O%3D0%26N%3D0%26Q%3D0%26P%3D0%26S%3D0%26R%3D0%26U%3D0%26T%3D0%26W%3D0%26V%3D0%26Y%3D0%26page%3D2&tabtype=choice&tags_id=%5B%5D&tab_id=0&face_simulator_time=&is_7770_homepage_gray=1&app_name=com.wanmeizhensuo.zhensuo&version=7.8.0&platform=android&device_id={}&os_version=8.1.0&model=V1809T&screen=1080x2340&lat=40.00204&lng=116.487055&channel=benzhan&current_city_id={}&manufacturer=vivo&uuid=2b15eed5-5361-4a7a-874d-c6a87d5e0a64&android_device_id=androidid_233708112de9a151".format(pre, device_Android, city)
J4 = "{}/api/index/v7?offset=A%3D19%26C%3D7%26B%3D4%26E%3D3%26D%3D0%26G%3D0%26F%3D0%26I%3D0%26H%3D3%26K%3D0%26J%3D0%26M%3D0%26L%3D0%26O%3D0%26N%3D0%26Q%3D0%26P%3D0%26S%3D0%26R%3D0%26U%3D0%26T%3D0%26W%3D0%26V%3D0%26Y%3D0%26page%3D3&tabtype=choice&tags_id=%5B%5D&tab_id=0&face_simulator_time=&is_7770_homepage_gray=1&app_name=com.wanmeizhensuo.zhensuo&version=7.8.0&platform=android&device_id={}&os_version=8.1.0&model=V1809T&screen=1080x2340&lat=40.00204&lng=116.487055&channel=benzhan&current_city_id={}&manufacturer=vivo&uuid=2b15eed5-5361-4a7a-874d-c6a87d5e0a64&android_device_id=androidid_233708112de9a151".format(pre, device_Android, city)
J5 = "{}/api/index/v7?offset=A%3D19%26C%3D7%26B%3D4%26E%3D3%26D%3D0%26G%3D0%26F%3D0%26I%3D0%26H%3D3%26K%3D0%26J%3D0%26M%3D0%26L%3D0%26O%3D0%26N%3D0%26Q%3D0%26P%3D0%26S%3D0%26R%3D0%26U%3D0%26T%3D0%26W%3D0%26V%3D0%26Y%3D0%26page%3D4&tabtype=choice&tags_id=%5B%5D&tab_id=0&face_simulator_time=&is_7770_homepage_gray=1&app_name=com.wanmeizhensuo.zhensuo&version=7.8.0&platform=android&device_id={}&os_version=8.1.0&model=V1809T&screen=1080x2340&lat=40.00204&lng=116.487055&channel=benzhan&current_city_id={}&manufacturer=vivo&uuid=2b15eed5-5361-4a7a-874d-c6a87d5e0a64&android_device_id=androidid_233708112de9a151".format(
pre, device_Android, city)
# IOS
OJ1 = "{}/api/index/v7?platform=iPhone&os_version=11.4.1&version=7.9.2&model=iPhone%206s&release=1&idfa=B6712382-69D5-4B12-9810-5F266411C4CF&idfv=EEF47D5D-0B1D-46C6-AB16-3D3BFC125044&device_id={}&channel=App%20Store&app_name=gengmeiios&current_city_id={}&lat=40.00148597039029&lng=116.484250436819&is_WiFi=1&hardware_model=iPhone8,1&count=10&offset=&tab_id=0&tabtype=choice&tags_id=%5B%5D".format(pre, device_IOS, city)
OJ2 = "{}/api/index/v7?platform=iPhone&os_version=12.0.1&version=7.8.0&model=iPhone%205S&release=1&idfa=00000000-0000-0000-0000-000000000000&idfv=1CF209E0-D061-4630-817C-3A7B90AAA1A9&device_id={}&channel=App%20Store&app_name=gengmeiios&current_city_id={}&lat=40.00198608081014&lng=116.4871573600158&is_WiFi=1&hardware_model=iPhone6,2&count=10&offset=A%3D5%26C%3D2%26B%3D1%26E%3D1%26D%3D0%26G%3D0%26F%3D0%26I%3D0%26H%3D1%26K%3D0%26J%3D0%26M%3D0%26L%3D0%26O%3D0%26N%3D0%26Q%3D0%26P%3D0%26S%3D0%26R%3D0%26U%3D0%26T%3D0%26W%3D0%26V%3D0%26Y%3D0%26page%3D1&tab_id=0&tabtype=choice&tags_id=%5B%5D".format(pre, device_IOS, city)
OJ3 = "{}/api/index/v7?platform=iPhone&os_version=12.0.1&version=7.8.0&model=iPhone%205S&release=1&idfa=00000000-0000-0000-0000-000000000000&idfv=1CF209E0-D061-4630-817C-3A7B90AAA1A9&device_id={}&channel=App%20Store&app_name=gengmeiios&current_city_id={}&lat=40.00198608081014&lng=116.4871573600158&is_WiFi=1&hardware_model=iPhone6,2&count=10&offset=A%3D11%26C%3D4%26B%3D2%26E%3D2%26D%3D0%26G%3D0%26F%3D0%26I%3D0%26H%3D2%26K%3D0%26J%3D0%26M%3D0%26L%3D0%26O%3D0%26N%3D0%26Q%3D0%26P%3D0%26S%3D0%26R%3D0%26U%3D0%26T%3D0%26W%3D0%26V%3D0%26Y%3D0%26page%3D2&tab_id=0&tabtype=choice&tags_id=%5B%5D".format(pre, device_IOS, city)
OJ4 = "{}/api/index/v7?platform=iPhone&os_version=12.0.1&version=7.8.0&model=iPhone%205S&release=1&idfa=00000000-0000-0000-0000-000000000000&idfv=1CF209E0-D061-4630-817C-3A7B90AAA1A9&device_id={}&channel=App%20Store&app_name=gengmeiios&current_city_id={}&lat=40.00198608081014&lng=116.4871573600158&is_WiFi=1&hardware_model=iPhone6,2&count=10&offset=A%3D19%26C%3D7%26B%3D4%26E%3D3%26D%3D0%26G%3D0%26F%3D0%26I%3D0%26H%3D3%26K%3D0%26J%3D0%26M%3D0%26L%3D0%26O%3D0%26N%3D0%26Q%3D0%26P%3D0%26S%3D0%26R%3D0%26U%3D0%26T%3D0%26W%3D0%26V%3D0%26Y%3D0%26page%3D3&tab_id=0&tabtype=choice&tags_id=%5B%5D".format(pre, device_IOS, city)
OJ5 = "{}/api/index/v7?platform=iPhone&os_version=12.0.1&version=7.8.0&model=iPhone%205S&release=1&idfa=00000000-0000-0000-0000-000000000000&idfv=1CF209E0-D061-4630-817C-3A7B90AAA1A9&device_id={}&channel=App%20Store&app_name=gengmeiios&current_city_id={}&lat=40.00198608081014&lng=116.4871573600158&is_WiFi=1&hardware_model=iPhone6,2&count=10&offset=A%3D19%26C%3D7%26B%3D4%26E%3D3%26D%3D0%26G%3D0%26F%3D0%26I%3D0%26H%3D3%26K%3D0%26J%3D0%26M%3D0%26L%3D0%26O%3D0%26N%3D0%26Q%3D0%26P%3D0%26S%3D0%26R%3D0%26U%3D0%26T%3D0%26W%3D0%26V%3D0%26Y%3D0%26page%3D4&tab_id=0&tabtype=choice&tags_id=%5B%5D".format(
pre, device_IOS, city)
JX = [OJ1, OJ2, OJ3, OJ4, OJ5]
# JX = [J, J2, J3, J4, J5]
# JX_Text = ["Android 精选第一页", "Android 精选第二页", "Android 精选第三页", "Android 精选第四页", "Android 精选第五页"]
JX_Text = ["IOS 精选第一页", "IOS 精选第二页", "IOS 精选第三页", "IOS 精选第四页", "IOS 精选第五页"]
for i in range(len(JX)):
sultAll = s.get(JX[i], verify=False) # 精选
data = sultAll.json()['data']['features']
# print(json.dumps(sultAll.json()))
print("%s , 数据总数为: %s" % (JX_Text[i], len(data))) # 精选
dataType = []
dateId = []
tag_id = []
tag_name = []
topic = []
for i in range(len(data)):
if 'diary' in data[i]:
dataType.append('diary')
dateId.append(data[i]['id'])
try:
tag_id.append(data[i]['diary']['tags'][0]['tag_id'])
tag_name.append(data[i]['diary']['tags'][0]['name'])
except:
tag_id.append("日记tag为空")
tag_name.append("日记tag为空")
elif 'topic' in data[i]:
# print(data[i]['topic'])
dataType.append('topic')
dateId.append(data[i]['id'])
topic.append(data[i]['id'])
a = data[i]['topic']['tags']
# print(a)
try:
tag_id.append(a[0]['tag_id'])
tag_name.append(a[0]['tag_name'])
except:
tag_id.append("帖子tag为空")
tag_name.append("帖子tag为空")
elif 'wiki' in data[i]:
# print(data[i]['wiki'])
dataType.append('wiki')
dateId.append(data[i]['id'])
tag_id.append(data[i]['wiki']['tags'][0]['tag_id'])
tag_name.append(data[i]['wiki']['tags'][0]['tag_name'])
elif 'data' in data[i]:
# print(data[i]['data'])
dataType.append('data')
dateId.append(data[i]['id'])
tag_id.append(data[i]['data']['tags'][0]['tag_id'])
tag_name.append(data[i]['data']['tags'][0]['tag_name'])
elif 'live' in data[i]:
# print(data[i]['live'])
dataType.append('live')
dateId.append(data[i]['id'])
tag_id.append(data[i]['live']['tags'][0]['tag_id'])
tag_name.append(data[i]['live']['tags'][0]['tag_name'])
elif 'special' in data[i]:
# print(data[i]['special'])
dataType.append('special')
dateId.append(data[i]['id'])
tag_id.append(data[i]['special']['tags'][0]['tag_id'])
tag_name.append(data[i]['special']['tags'][0]['tag_name'])
elif 'qa' in data[i]:
# print(data[i]['qa'])
dataType.append('qa')
dateId.append(data[i]['id'])
tag_id.append('问答')
tag_name.append('问答')
else:
# print("异常: %s" % data[i])
dataType.append('异常')
dateId.append('异常')
tag_id.append('异常')
tag_name.append('异常')
# print(dataType)
print(dateId)
# print(tag_id)
# print(tag_name)
# print(topic)
for i in dateId:
if i == id:
print("失败")
else:
pass
if __name__ == '__main__':
get_d()
a = mysql(device_Android, city)
for i in a:
chapter_list = get_chapter(i)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment