Commit 040a0f2c authored by litaolemo's avatar litaolemo

update

parent e0b47295
# -*- coding:UTF-8 -*-
# @Time : 2020/10/26 16:10
# @File : __init__.py.py
# @email : litao@igengmei.com
# @author : litao
\ No newline at end of file
# -*- coding:UTF-8 -*-
# @Time : 2020/10/26 16:10
# @File : api_crawler_test.py
# @email : litao@igengmei.com
# @author : litao
import requests
from crawler.crawler_sys.utils.output_results import retry_get_url
import json, random, urllib
import requests
import json
from colorama import init, Fore, Back
init(autoreset=True)
def test_search_diary(query=''):
url = 'https://backend.igengmei.com/api/search/v6/content?query=%s&tab_type=1&is_first=0&size=100&order_by=0&area_ref=&max_price=100000&min_price=0&ai_tab_type=0&sort_type=0&app_name=com.wanmeizhensuo.zhensuo&version=7.35.0&platform=android&device_id=869412032478155&os_version=10&model=LYA-AL00&screen=1080x2265&lat=40.005167&lng=116.477603&channel=benzhan&manufacturer=HUAWEI&uuid=1277ae88-094c-4484-9357-1b3e4519521e&android_device_id=androidid_7255c3398845cdd5&current_city_id=beijing' % (query)
diary_id_list = []
tractate_id_list = []
topic_id = []
for index in range(0,6):
response = retry_get_url(url, params={'offset': index * 100})
# print(response.url)
dict_response = response.json()['data']['diaries']
print("这是第%d次请求" % index)
for data in dict_response:
# print(data['data_type'],data['id'])
if data['data_type'] == 19:
if data['id'] not in tractate_id_list:
tractate_id_list.append(data['id'])
# print(data['id'])
elif data['data_type'] == 0:
if data['id'] not in diary_id_list:
diary_id_list.append(data['id'])
# if len(diary_id_list) >= 100 and len(tractate_id_list) >= 100:
# break
print("================================================")
print(diary_id_list,tractate_id_list)
return (diary_id_list,tractate_id_list)
if __name__ == '__main__':
query_list = ["双眼皮",
"瘦脸针",
"脱毛",
"小气泡",
"发型",
"水光针",
"热玛吉",
"光子嫩肤",
"测脸型",
"吸脂",
"玻尿酸",
"鼻综合",
"脸型适合什么发型",
"果酸焕肤",
"双眼皮2388",
"植发",
"线雕",
"开内眼角",
"祛斑",
"美白针",
"搜索项目、商品、医生",
"测发型",
"隆鼻",
"菲洛嘉",
"瘦腿针",
"眼综合",
"祛痘",
"除皱",
"超声刀",
"如何根据脸型测发型",
"皮秒",
"隆胸",
"脸型",
"热拉提",
"超皮秒",
"手术瘦脸",
"皮秒激光",
"微针",
"改善肤质",
"丰胸(隆胸)",
"厚唇改薄",
"点阵激光",
"鼻头",
"韩式半永久纹眉",
"玻尿酸注射",
"下颌角",
"牙齿矫正",
"面部吸脂",
"光子嫩肤88",
"韩式半永久妆",
"毛发移植",
"热玛吉五代",
"自体脂肪填充面部",
"拍照测发型",
"医选",
"颧骨内推",
"牙齿美白",
"面部轮廓",
"瘦脸针限时680",
"鼻部综合",
"自体脂肪填充",
"武汉洪山月目医疗美容",
"吸脂瘦脸",
"黄金微针",
"嗨体",
]
res_list = []
for query in query_list:
print(query)
try:
diary_id_list,tractate_id_list = test_search_diary(query=query)
res_list.append([query,diary_id_list,tractate_id_list])
except:
continue
#break
import pandas as pd
res = pd.DataFrame(res_list)
res.to_csv("save.csv",encoding='gb18030')
\ No newline at end of file
......@@ -75,7 +75,7 @@ spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJso
spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'")
task_list = []
task_days = 3
task_days = 120
for t in range(1, task_days):
day_num = 0 - t
now = (datetime.datetime.now() + datetime.timedelta(days=day_num))
......@@ -85,7 +85,7 @@ for t in range(1, task_days):
one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
sql_search_ctr = r"""
SELECT
SELECT
t1.partition_date as partition_date
,active_type as active_type
,device_os_type as device_os_type
......@@ -99,6 +99,7 @@ t1.partition_date as partition_date
,NVL(sum(two_click_pv),0) as two_click_pv--`有效二跳pv`
,NVL(sum(cpc_click_pv),0) as cpc_click_pv--`cpc卡片点击pv`
,NVL(sum(cpc_exp_pv),0) as cpc_exp_pv --`cpc卡片曝光pv`
FROM
(
SELECT partition_date
......@@ -108,16 +109,16 @@ FROM
,device_id
,CASE WHEN substr(md5(device_id),-1) in ('0','1','2','3','4','5','6','7') THEN '灰度' ELSE '非灰' END AS grey_type
FROM online.ml_device_day_active_status
WHERE partition_date>={start_day}
AND partition_date<= {partition_day}
WHERE partition_date>={yesterday_str}
AND partition_date<= {partition_date}
AND active_type IN ('1','2','4')
)t1
JOIN
(--精准曝光
SELECT cl_id,partition_date,card_id,count(1) as exp_pv,count(CASE WHEN get_json_object(exposure_card, '$.is_cpc')=1 THEN 1 END) as cpc_exp_pv
FROM online.ml_community_precise_exposure_detail
WHERE partition_date>={start_day}
AND partition_date<= {partition_day}
WHERE partition_date>={yesterday_str}
AND partition_date<= {partition_date}
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND page_name in('welfare_home')
AND tab_name in ('精选')
......@@ -127,11 +128,11 @@ JOIN
)t2
on t1.device_id=t2.cl_id and t1.partition_date=t2.partition_date
LEFT JOIN
(--卡片点击
SELECT cl_id,partition_date,params['card_id'] as card_id,count(1) as click_pv,count(CASE WHEN params['is_cpc']=1 THEN 1 ELSE 0 END) as cpc_click_pv
(--卡片点击
SELECT cl_id,partition_date,params['card_id'] as card_id,count(1) as click_pv,count(CASE WHEN params['is_cpc']=1 THEN 1 END) as cpc_click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>={start_day}
AND partition_date<= {partition_day}
WHERE partition_date>={yesterday_str}
AND partition_date<= {partition_date}
AND action='on_click_card'
AND params['tab_name']='精选'
AND params['page_name'] ='welfare_home'
......@@ -145,8 +146,8 @@ LEFT JOIN
(--商祥二跳
SELECT cl_id,partition_date,params['service_id'] as service_id,count(1) as two_click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>={start_day}
AND partition_date<= {partition_day}
WHERE partition_date>={yesterday_str}
AND partition_date<= {partition_date}
AND (referrer in ('welfare_home')
or (params['referrer_link'] like '%[%' and json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('welfare_home')))
AND ((action in ('welfare_multiattribute_click_add','welfare_multiattribute_click_buy') AND page_name = 'welfare_detail')
......@@ -164,7 +165,7 @@ LEFT JOIN
UNION ALL
SELECT device_id
FROM ml.ml_d_ct_dv_devicespam_d --剔除刷量设备
WHERE partition_day={partition_day}
WHERE partition_day={partition_date}
)a
on t1.device_id=a.device_id
LEFT JOIN
......@@ -175,20 +176,20 @@ LEFT JOIN
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>={start_day}
AND partition_date<= {partition_day}
WHERE partition_date>={yesterday_str}
AND partition_date<= {partition_date}
)t1
JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = {partition_day}
WHERE partition_date = {partition_date}
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = {partition_day}
WHERE partition_day = {partition_date}
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
......@@ -204,13 +205,13 @@ LEFT JOIN
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = {partition_day}
WHERE partition_date = {partition_date}
)t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = {partition_day}
WHERE partition_date = {partition_date}
AND is_login_doctor = '1'
)t2
ON t1.device_id = t2.device_id
......@@ -224,6 +225,7 @@ and (b.device_id is null or b.device_id ='')
GROUP BY t1.partition_date
,grey_type,active_type,device_os_type
order by 1
""".format(partition_day=today_str, start_day=yesterday_str)
print(sql_search_ctr)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment