Commit 288613fc authored by litaolemo's avatar litaolemo

update

parent f1e00560
# -*- coding:utf-8 -*-
# @Time : 2019/7/16 16:08
# @Author : litao
# -*- coding:utf-8 -*-
# @Time : 2019/5/5 14:38
# @Author : litao
import re
def calculate_douyin_id(data_dic):
url = data_dic.get("url")
if "?" in url:
find_vid = url.split("?")
elif "video" in url:
find_vid = re.findall('/video/(.*?)/', url)
if find_vid:
find_vid = ["https://www.iesdouyin.com/share/video/%s/" % find_vid[0]]
else:
return url
if find_vid != []:
vid = find_vid[0]
else:
vid = url
return vid
if __name__=='__main__':
print(calculate_douyin_id("https://www.iesdouyin.com/share/vido/6688242923181591821/?mid=6688519042262665996"))
print(calculate_douyin_id("https://www.iesdouyin.com/share/video/6689249077596671245/?mid=6689052145968450308"))
\ No newline at end of file
# -*- coding:utf-8 -*-
# @Time : 2019/8/27 16:24
# @Author : litao
import re
def calculate_haokan_id(data_dic):
url = data_dic.get("url")
# if data_dic.get("video_id"):
# return data_dic["video_id"]
if "id=" in url:
find_vid = re.findall('id=(\d+)', url)
return find_vid[0]
elif "context=%7B%22nid%22%3A%22sv_" in url:
find_vid = re.findall('context=%7B%22nid%22%3A%22sv_(.+)%22%7D', url)
return find_vid[0]
else:
return url
if __name__ == '__main__':
print(calculate_haokan_id({"url":"https://sv.baidu.com/videoui/page/videoland?context=%7B%22nid%22%3A%22sv_5091548046938576131%22%7D"}))
print(calculate_haokan_id({"url":"https://haokan.baidu.com/v?vid=4596161678511752193"}))
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 3 15:12:09 2018
@author: hanye
"""
import hashlib
def calculate_kwai_video_id_by_data(kwai_video_dict):
try:
title = kwai_video_dict['title']
title_c = title.replace(' ', '').replace('\r', '').replace('\n', '').replace('\t', '')
releaser = kwai_video_dict['releaser']
release_time_ts = kwai_video_dict['release_time']
kwai_key = title_c + '_' + releaser + '_' + str(release_time_ts)
key_hash = hashlib.md5(kwai_key.encode('utf-8')).hexdigest()
vid = key_hash
except:
try:
kwai_key = kwai_video_dict['url']
vid = kwai_key
except:
vid = None
return vid
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 6 15:53:10 2018
@author: zhouyujiang
"""
import re
def calculate_kwai_video_id_by_data_by_url(data_dic):
url = data_dic.get("url")
doc_id_str = re.findall(r"/u/(.+)?|/photo/(.+)?",url)
if doc_id_str!=[]:
for i in doc_id_str[0]:
if i!='':
vid = str(i).replace('/','_')
return vid
else:
return None
if __name__=='__main__':
print(calculate_kwai_video_id_by_data_by_url('https://www.kuaishou.com/u/143139353/5601747480'))
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 12 16:40:20 2017
@author: hanye
"""
import re
def calculate_newTudou_video_id(data_dic):
url = data_dic.get("url")
try:
d_url_s_Lst = url.split('.html')
d_videoID = d_url_s_Lst[0]
newTudou_video_id = re.findall(r"/\w/(.+)?", d_videoID)[0]
except:
newTudou_video_id = None
return newTudou_video_id
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 6 09:54:09 2017
@author: hanye
"""
import re
def calculate_toutiao_video_id(data_dic):
toutiao_url = data_dic.get("url")
if toutiao_url[-1] != '/':
toutiao_url = toutiao_url + '/'
find_vid = re.findall('[0-9]+/', toutiao_url)
if find_vid!=[]:
vid = find_vid[0].replace('/', '')
return vid
else:
return None
from write_data_into_es.func_get_releaser_id import get_releaser_id
def calculate_txxw_video_id(data_dict):
try:
releaser_id = get_releaser_id(platform="腾讯新闻", releaserUrl=data_dict["releaserUrl"])
video_id = data_dict['video_id']
if releaser_id:
return video_id + "_" +releaser_id
else:
return video_id
except:
print('error in :', data_dict)
return None
# -*- coding:utf-8 -*-
# @Time : 2019/5/5 14:38
# @Author : litao
import re
def calculate_wangyi_news_id(url):
if "/sub/" in url:
find_vid = re.findall('/sub/(.+)\.html', url)
elif "/v/" in url:
find_vid = re.findall('/v/(.+)\.html', url)
else:
return url
if find_vid != []:
vid = find_vid[0]
else:
vid = url
return vid
if __name__=='__main__':
print(calculate_wangyi_news_id("https://c.m.163.com/news/v/VA9LBOJ7S.html"))
print(calculate_wangyi_news_id("https://c.m.163.com/news/sub/T1539761239294.html"))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment