Commit 709a4df1 authored by haowang's avatar haowang

add upload file

parent 0ba5fda3
from __future__ import absolute_import, unicode_literals
import hashlib
import datetime
import time
import six
from collections import OrderedDict
from qiniu import Auth, PersistentFop, urlsafe_base64_encode
from qiniu import put_data, put_file
from qiniu import BucketManager, build_batch_copy
import json
from urllib import parse
QINIU_ACCESS_KEY = "UPCOYIJkZOMcdd9FDzpBqYjzWUh55fBpVi3AhWpL"
QINIU_SECRET_KEY = "z5YvpDDSam_JE345Z8J_f3TufzelOW2VOGNoBl9e"
# Dummy value for Enum as EnumMeta explicitly checks for it, but of course
# until EnumMeta finishes running the first time the Enum class doesn't exist.
Enum = None
def _is_descriptor(obj):
"""Returns True if obj is a descriptor, False otherwise."""
return (
hasattr(obj, '__get__') or
hasattr(obj, '__set__') or
hasattr(obj, '__delete__'))
def _is_dunder(name):
"""Returns True if a __dunder__ name, False otherwise."""
return (name[:2] == name[-2:] == '__' and
name[2:3] != '_' and
name[-3:-2] != '_' and
len(name) > 4)
def _is_sunder(name):
"""Returns True if a _sunder_ name, False otherwise."""
return (name[0] == name[-1] == '_' and
name[1:2] != '_' and
name[-2:-1] != '_' and
len(name) > 2)
class _EnumDict(dict):
"""Track enum member order and ensure member names are not reused.
EnumMeta will use the names found in self._member_names as the
enumeration member names.
"""
@classmethod
def hack4py2(cls, classdict, allow_types):
"""hacking for python2 to generate an instance from the original classdict
"""
assert isinstance(classdict, dict)
res = cls(allow_types=allow_types)
for key, value in six.iteritems(classdict):
res[key] = value
return res
def _check_value_type(self, v):
"""The value type must be six.integer_types or six.string_types
In fact, enumeration does not concern about values,
but just the names of enumeration.
Value type are restricted for the convenience to be json serializable.
"""
def _check_basetype(v):
if not isinstance(v, self._allow_types):
raise TypeError('Value type must be one of [%s], '
'instead of %s' % (', '.join(map(str, self._allow_types)), type(v)))
if type(v) is tuple:
if len(v) != 2:
raise TypeError('Tuple enum definition must be length of 2')
_check_basetype(v[0])
if not isinstance(v[1], six.string_types):
raise TypeError('The second element of tuple enum definition '
'must be string, i.e. an explanation of the enum value')
else:
_check_basetype(v)
def __init__(self, allow_types):
super(_EnumDict, self).__init__()
self._member_names = []
self._allow_types = allow_types
def __setitem__(self, key, value):
"""Changes anything not dundered
If an enum member name is used twice, an error is raised; duplicate
values are not checked for.
Single underscore (sunder) names are reserved.
"""
if _is_sunder(key):
raise ValueError('_names_ are reserved for future Enum use')
elif _is_dunder(key):
pass
elif key in self._member_names:
# descriptor overwriting an enum?
raise TypeError('Attempted to reuse key: %r' % key)
elif not _is_descriptor(value):
if key in self:
# enum overwriting a descriptor?
raise TypeError('Key already defined as: %r' % self[key])
# value must be BASE_TYPE, or tuple of (BASE_TYPE, six.string_types)
# BASE_TYPE is six.string_types or integer
self._check_value_type(value)
self._member_names.append(key)
super(_EnumDict, self).__setitem__(key, value)
class EnumMeta(type):
"""Metaclass for Enum"""
@staticmethod
def _find_allow_types_(cls, bases):
all_types = set(six.integer_types) | {six.text_type, str}
allow_types = set()
if Enum is None: # Enum base class
assert cls == 'Enum'
return tuple(all_types)
else:
for base in bases:
if not issubclass(base, Enum):
allow_types.add(base)
if allow_types:
return tuple(all_types & allow_types)
else:
return tuple(all_types)
@classmethod
def __prepare__(mcs, cls, bases):
return _EnumDict(EnumMeta._find_allow_types_(cls, bases))
def __new__(mcs, cls, bases, _dct):
# hacking to generate an _EnumDict Object
if six.PY2:
dct = _EnumDict.hack4py2(_dct, EnumMeta._find_allow_types_(cls, bases))
else:
dct = _dct
# save enum items into separate mapping so they don't get baked into
# the new class
# 临时方案,原因是python2中使用OrderedDict无效,并且会影响原来某些业务的枚举顺序
if six.PY2:
members = {k: dct[k] for k in dct._member_names}
else:
members = OrderedDict([(k, dct[k]) for k in dct._member_names])
for name in dct._member_names:
del dct[name]
# check for illegal enum names (any others?)
invalid_names = set(members) & {'mro', }
if invalid_names:
raise ValueError('Invalid enum member name: {0}'.format(
','.join(invalid_names)))
# create our new Enum type
enum_class = super(EnumMeta, mcs).__new__(mcs, cls, bases, dct)
enum_class._member_names_ = [] # names in definition order
enum_class._member_map_ = OrderedDict() # name->value map
# Reverse value->name map for hashable values.
enum_class._value2member_map_ = {}
enum_class.choices = []
# instantiate them, checking for duplicates as we go
# we instantiate first instead of checking for duplicates first in case
# a custom __new__ is doing something funky with the values -- such as
# auto-numbering ;)
for member_name, value in six.iteritems(members):
if isinstance(value, tuple):
real_value = value[0]
desc = value[1]
else:
real_value = value
desc = ''
enum_member = enum_class()
enum_member._name_ = member_name
enum_member._value_ = real_value
enum_member._desc_ = desc
# If another member with the same value was already defined, the
# new member becomes an alias to the existing one.
for name, canonical_member in six.iteritems(enum_class._member_map_):
if canonical_member._value_ == enum_member._value_:
enum_member = canonical_member
break
else:
# Aliases don't appear in member names (only in __members__).
enum_class._member_names_.append(member_name)
# now add to _member_map_
enum_class._member_map_[member_name] = enum_member
enum_class._value2member_map_[real_value] = enum_member
enum_class.choices.append([real_value, desc])
return enum_class
def __contains__(cls, value):
return value in cls._value2member_map_
def __delattr__(cls, attr):
# nicer error message when someone tries to delete an attribute
# (see issue19025).
if attr in cls._member_map_:
raise AttributeError(
"%s: cannot delete Enum member." % cls.__name__)
super(EnumMeta, cls).__delattr__(attr)
def __getattr__(cls, name):
"""Return the enum member matching `name`
We use __getattr__ instead of descriptors or inserting into the enum
class' __dict__ in order to support `name` and `value` being both
properties for enum members (which live in the class' __dict__) and
enum members themselves.
"""
# check if classmethod(bound now)
if _is_dunder(name):
raise AttributeError(name)
try:
enum_member = cls._member_map_[name]
return enum_member._value_
except KeyError:
six.raise_from(AttributeError(name), None)
def __desc__(cls, value):
return cls._value2member_map_[value]._desc_
@property
def __members__(cls):
"""Returns a mapping of member name->value.
This mapping lists all enum members, including aliases. Note that this
is a read-only view of the internal mapping.
"""
return cls._member_map_.copy()
def __getitem__(cls, name):
return cls._member_map_[name]
def __iter__(cls):
"""Returns a tuple of tuples(member.value, member.desc) for each member
"""
return ((
cls._member_map_[name]._value_,
cls._member_map_[name]._desc_) for name in cls._member_names_)
def __len__(cls):
return len(cls._member_names_)
def __repr__(cls):
return "<enum %r>" % cls.__name__
def __reversed__(cls):
return (cls._member_map_[name] for name in reversed(cls._member_names_))
def __setattr__(cls, name, value):
"""Block attempts to reassign Enum members.
A simple assignment to the class namespace only changes one of the
several possible ways to get an Enum member from the Enum class,
resulting in an inconsistent Enumeration.
"""
member_map = cls.__dict__.get('_member_map_', {})
if name in member_map:
raise AttributeError('Cannot reassign members.')
super(EnumMeta, cls).__setattr__(name, value)
def __dir__(self):
return list(super(EnumMeta, self).__dir__()) + self._member_names_
class Enum(six.with_metaclass(EnumMeta, object)):
"""Generic enumeration.
Derive from this class to define new enumerations.
"""
def __repr__(self):
return "<%s.%s: %r>" % (
self.__class__.__name__, self._name_, self._value_)
def __str__(self):
if self._desc_:
return "%s.%s(%s)" % (self.__class__.__name__, self._name_, self._desc_)
else:
return "%s.%s" % (self.__class__.__name__, self._name_)
def __hash__(self):
return hash(self._name_)
@classmethod
def getDesc(cls, key, defaultValue=None):
"""Backport function for gaia.rpc.tool.enumeration.Enumeration
"""
try:
return cls.__desc__(key)
except KeyError:
return defaultValue
def unique(enumeration):
"""Class decorator for enumerations ensuring unique member values."""
duplicates = []
for name, member in six.iteritems(enumeration.__members__):
if name != member._name_:
duplicates.append((name, member._name_))
if duplicates:
alias_details = ', '.join(
["%s -> %s" % (alias, name) for (alias, name) in duplicates])
raise ValueError('duplicate values found in %r: %s' %
(enumeration, alias_details))
return enumeration
@unique
class IMG_TYPE(Enum):
BANNER = (1, 'Banner')
BODYPART = (2, 'BodyPart')
DIARY = (3, '日记本')
CAMPAIGNIMAGELINK = (5, '活动图片链接')
CONSULTWIKI = (6, '咨询百科')
DOCTOR = (7, '医生')
FEEDBACKCATEGORY = (8, '反馈类型')
FEEDBACKIMAGE = (9, '用户反馈上传的图片')
GREETINGPOPUP = (10, '开屏页和首页提醒')
HOSPITAL = (11, '医院')
ITEMWIKI = (12, '整形项目(wiki)')
PRIVATECONVERSATION = (13, '私信对话')
ORGANIZATIONIMAGE = (14, '机构图片')
PREOPERATIONIMAGE = (15, '术前图')
RECOMMENDAPP = (16, '精品应用')
WEBSLIDE = (17, 'web 轮播图')
BULLETIN = (18, '公告(医生版)')
SERVICE = (19, '美购')
SERVICEACTIVITY = (20, '美购活动')
SHARE = (21, '分享')
SLIDE = (22, '轮播图')
SMALLIMAGE = (23, '首页小图(单排横滑模版)')
SPECIAL = (24, '专题')
TAG = (25, 'TAG')
TOPIC = (26, '帖子')
TOPICREPLY = (27, '帖子回复')
TOPICIMAGE = (28, '帖子图片')
USEREXTRA = (29, '用户')
POST = (30, '文章(医生版)')
ARTICLE = (31, '所长推荐/扒扒扒')
DOCTORREGISTER = (32, '医生注册')
HOSPITALCAPTURE = (33, '医院(采集)')
HOMEPOPUP = (34, '首页弹窗(医生)')
HOMESLIDE = (35, '首页轮播图(医生)')
ZEUS_STAFF_PROTRAIT = (36, 'ZEUS员工头像')
CIRCLEBANNER = (37, '圈子圈子横幅')
CAMPAIGNBANNER = (38, '活动圈子活动banner图')
CIRCLEICON = (39, '圈子图标')
ADVERTISE = (40, '广告位')
INSTALLMENT = (41, '分期')
BANK = (42, '银行logo')
AUDIO = (43, '录音音频')
ZHIBO = (44, '直播')
# 广告系统相关
ADCOVERMAP = (45, '广告封面图')
TRADEIMAGE = (46, '交易图片')
SERVICE_COMMENT = (47, '美购评价')
APOLLO = (48, '分销')
SERVICE_WATERMARK = (49, '促销标签')
PLUTUS_QR = (50, '分期二维码')
PLUTUS_PDF= (51, '金融pdf')
POLYMER = (52, '聚合页')
SERVICEHOME = (53, '美购主页')
CATEGORY_POLYMER = (54, '品类聚合页')
ICON = (55, '我的页面icon')
# 无类型
WATERMARK = (98, '带水印')
NOWATERMARK = (99, '不带水印')
# domain add http/https, when change the http or https, remember to modify testcase
qiniu_no_watermark = {
'domain': 'https://heras.igengmei.com',
'bucket': 'hera',
}
qiniu_no_watermark_platform = {
'qiniu': qiniu_no_watermark
}
qiniu_watermark = {
'domain': 'https://pic.igengmei.com',
'bucket': 'wanmeizhensuo',
}
qiniu_video = {
'domain': 'http://video-static.igengmei.com',
'bucket': 'video',
}
qiniu_watermark_platform = {
'qiniu': qiniu_watermark
}
qiniu_audio_platform = {
'qiniu': {
'domain': 'http://phonerecord.private.igengmei.com',
'bucket': 'phone-record',
}
}
qiniu_installment_platform = {
'qiniu': {
'domain': 'http://idcard.private.igengmei.com',
'bucket': 'id-card',
}
}
image_type = {
IMG_TYPE.WATERMARK: {
'prefix': '',
'platform': qiniu_watermark_platform,
},
IMG_TYPE.NOWATERMARK: {
'prefix': '',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.BANNER: {
'prefix': 'banner',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.ICON: {
'prefix': '',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.BODYPART: {
'prefix': 'bodypart',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.DIARY: {
'prefix': 'diary',
'platform': qiniu_watermark_platform,
},
IMG_TYPE.CAMPAIGNIMAGELINK: {
'prefix': 'campaignimagelink',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.CONSULTWIKI: {
'prefix': 'consultwiki',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.DOCTOR: {
'prefix': 'doctor',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.FEEDBACKCATEGORY: {
'prefix': 'feedbackcategory',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.FEEDBACKIMAGE: {
'prefix': 'feedbackimage',
'platform': qiniu_watermark_platform,
},
IMG_TYPE.GREETINGPOPUP: {
'prefix': 'greetingpopup',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.HOSPITAL: {
'prefix': 'hospital',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.ITEMWIKI: {
'prefix': 'itemwiki',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.PRIVATECONVERSATION: {
'prefix': 'privateconversation',
'platform': qiniu_watermark_platform,
},
IMG_TYPE.ORGANIZATIONIMAGE: {
'prefix': 'organizationimage',
'platform': qiniu_watermark_platform,
},
IMG_TYPE.PREOPERATIONIMAGE: {
'prefix': 'preoperationimage',
'platform': qiniu_watermark_platform,
},
IMG_TYPE.RECOMMENDAPP: {
'prefix': 'recommendapp',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.WEBSLIDE: {
'prefix': 'webslide',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.BULLETIN: {
'prefix': 'bulletin',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.SERVICE: {
'prefix': 'service',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.SERVICEACTIVITY: {
'prefix': 'serviceactivity',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.SHARE: {
'prefix': 'share',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.SLIDE: {
'prefix': 'slide',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.SMALLIMAGE: {
'prefix': 'smallimage',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.SPECIAL: {
'prefix': 'special',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.TAG: {
'prefix': 'tag',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.TOPIC: {
'prefix': 'topic',
'platform': qiniu_watermark_platform,
},
IMG_TYPE.TOPICREPLY: {
'prefix': 'topicreply',
'platform': qiniu_watermark_platform,
},
IMG_TYPE.TOPICIMAGE: {
'prefix': 'topicimage',
'platform': qiniu_watermark_platform,
},
IMG_TYPE.USEREXTRA: {
'prefix': 'userextra',
'platform': qiniu_watermark_platform,
},
IMG_TYPE.POST: {
'prefix': 'consultwiki',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.ARTICLE: {
'prefix': 'article',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.DOCTORREGISTER: {
'prefix': 'doctorregister',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.HOSPITALCAPTURE: {
'prefix': 'hospitalcapture',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.HOMEPOPUP: {
'prefix': 'homepopup',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.HOMESLIDE: {
'prefix': 'homeslide',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.ZEUS_STAFF_PROTRAIT: {
'prefix': 'zeusstaffprotrait',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.ADVERTISE: {
'prefix': 'advertise',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.CIRCLEBANNER: {
'prefix': 'circlebanner',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.CIRCLEICON: {
'prefix': 'circleicon',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.CAMPAIGNBANNER: {
'prefix': 'campaignbanner',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.INSTALLMENT: {
'prefix': 'installment',
'platform': qiniu_installment_platform,
},
IMG_TYPE.BANK: {
'prefix': 'bank',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.AUDIO: {
'prefix': '',
'platform': qiniu_audio_platform,
},
IMG_TYPE.ZHIBO: {
'prefix': '',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.ADCOVERMAP: {
'prefix': 'artemis',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.TRADEIMAGE: {
'prefix': 'artemis',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.SERVICE_COMMENT: {
'prefix': 'service_comment',
'platform': qiniu_watermark_platform,
},
IMG_TYPE.APOLLO: {
'prefix': 'apollo',
'platform': qiniu_installment_platform,
},
IMG_TYPE.SERVICE_WATERMARK: {
'prefix': 'service_watermark',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.PLUTUS_QR: {
'prefix': 'plutus_qr',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.PLUTUS_PDF: {
'prefix': 'plutus_pdf',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.POLYMER: {
'prefix': 'polymer',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.SERVICEHOME: {
'prefix': 'service_home',
'platform': qiniu_no_watermark_platform,
},
IMG_TYPE.CATEGORY_POLYMER: {
'prefix': 'category_polymer',
'platform': qiniu_no_watermark_platform,
},
}
class GmImageManager(object):
def __init__(self, img_type):
assert img_type in IMG_TYPE
self.img_type = img_type
def get_domain(self, platform='qiniu'):
# NOTE: 这里返回结果变为带http/https前缀的domain
return image_type.get(self.img_type)['platform'].get(platform)['domain']
def get_bucket(self, platform='qiniu'):
return image_type.get(self.img_type)['platform'].get(platform)['bucket']
def get_prefix(self):
return image_type.get(self.img_type)['prefix']
class QiniuTool(object):
access_key = QINIU_ACCESS_KEY
secret_key = QINIU_SECRET_KEY
q = Auth(access_key, secret_key)
bucket = BucketManager(q)
try:
from qiniu import CdnManager
cdn = CdnManager(q)
except:
cdn = None
@classmethod
def get_pritvate_url(cls, base_url, time=3600):
private_url = cls.q.private_download_url(base_url, expires=time)
return private_url
@classmethod
def get_private_url(cls, bucket_domain, key, time=3600):
# if bucket_domain not start with http/https, add http/https first, then construct base_url
if not bucket_domain.startswith('http://') and not bucket_domain.startswith('https://'):
bucket_domain = 'http://' + bucket_domain
base_url = '%s/%s' % (bucket_domain, key)
return cls.get_pritvate_url(base_url, time)
@classmethod
def upload(cls, file, save_name, bucket_name):
token = cls.q.upload_token(bucket_name)
data = file
count = 0
while count < 5:
count += 1
response = put_data(token, save_name, data, params=None, mime_type='application/octet-stream', check_crc=False,
progress_handler=None)
"""上传二进制流到七牛
Returns:
一个dict变量,类似 {"hash": "<Hash string>", "key": "<Key string>"}
一个ResponseInfo对象
"""
if response and response[0] and 'key' in response[0]:
key = response[0]['key']
return {'file': key}
raise Exception('upload filed')
@classmethod
def upload_file(cls, file_path, save_name, bucket_name):
# 上传本地文件
token = cls.q.upload_token(bucket_name)
count = 0
while count < 5:
count += 1
response = put_file(token, save_name, file_path)
"""上传本地文件到七牛
Returns:
一个dict变量,类似 {"hash": "<Hash string>", "key": "<Key string>"}
一个ResponseInfo对象
"""
if response and response[0] and 'key' in response[0]:
key = response[0]['key']
return {'file': key}
raise Exception('upload file filed')
@classmethod
def delete(cls, key, bucket_name):
ret, info = cls.bucket.delete(bucket_name, key)
if ret == {} or info.status_code == 612:
return True
else:
return False
@classmethod
def move(cls, old_key, new_key, old_bucket_name, new_bucket_name):
ret, info = cls.bucket.move(old_bucket_name, old_key, new_bucket_name, new_key)
if ret == {} or info.status_code == 614:
return True
else:
return False
@classmethod
def copy(cls, old_key, new_key, old_bucket_name, new_bucket_name):
ret, info = cls.bucket.copy(old_bucket_name, old_key, new_bucket_name, new_key)
if ret == {} or info.status_code == 614:
return True
else:
return False
@classmethod
def refresh(cls, urls):
'''刷新 cdn缓存'''
if not urls or not cls.cdn:
return True
ret, info = cls.cdn.refresh_urls(urls)
return True
@classmethod
def refresh_qiniu_resource_cache(cls, urls):
"""
刷新 七牛资源 缓存
注:
1、urls is list, 元素必须是绝对地址。且列表长度最大 100。
eg:["http://video-static.igengmei.com/883c4461af7c11c270afe9e80ae0d967.mp4", ]
2、需要刷新节点及 cdn节点资源。刷新生效时长 15分钟左右。
:return:
"""
if not urls or not cls.cdn:
return True
ret, info = cls.cdn.refresh_urls(urls)
ret, info = cls.cdn.prefetch_urls(urls)
return True
@classmethod
def prefetch(cls, key, bucket_name):
ret, info = cls.bucket.prefetch(bucket_name, key)
if ret == {}:
return True
else:
return False
@classmethod
def get_token(cls, bucket_name):
return cls.q.upload_token(bucket_name)
@classmethod
def set_video_watermark(cls, filename, newfilename, bucket_name, water_mark_url, pipeline):
base64URL = urlsafe_base64_encode(water_mark_url)
fops = 'avthumb/mp4/wmImage/' + base64URL
saveas_key = urlsafe_base64_encode(bucket_name + ':' + newfilename)
fops = fops + '|saveas/' + saveas_key
pfop = PersistentFop(cls.q, bucket_name, pipeline)
ops = []
ops.append(fops)
ret, info = pfop.execute(filename, ops, 1)
return ret['persistentId']
@classmethod
def video_clipping(cls, filename, new_filename, video_type, bucket_name, water_mark_url,
pipeline, start_time, duration, audio_no):
"""
视频转码/截取,该函数都是转为mp4
FrameRate 帧率未考虑,使用默认值,为低帧率(1~30)
:param filename: 原视频key
:param new_filename: 转码后的视频的key
:param video_type: 转码后的视频类型
:param bucket_name:
:param water_mark_url: 加水印的地址
:param pipeline: 专用通道
:param start_time: 时间偏移量,从那个时间点开始,单位:秒
:param duration: 截取多长时间
:param audio_no: 是否去掉音频
:return:
"""
fops_list = ['avthumb/{}'.format(video_type)]
if water_mark_url:
fops_list.append('wmImage/{}'.format(urlsafe_base64_encode(water_mark_url)))
if start_time:
fops_list.append('ss/{}'.format(start_time))
if duration:
fops_list.append('t/{}'.format(duration))
if audio_no is not None:
fops_list.append('an/{}'.format(audio_no))
fops = '/'.join(fops_list)
save_key = urlsafe_base64_encode(bucket_name + ':' + new_filename)
fops = fops + '|saveas/' + save_key
pfop = PersistentFop(cls.q, bucket_name, pipeline)
ops = []
ops.append(fops)
ret, info = pfop.execute(filename, ops, 1)
return ret['persistentId']
@classmethod
def set_text_watermark(cls,
text, image_url, saved_name, bucket_name, font_name="黑体",
font_size=240, text_color="FF0000", dissolve=100,
gravity="SouthEast", dis_x=10, dis_y=10
):
"""图片上设置文字水印"""
fop = [
'watermark/2/text/{}'.format(urlsafe_base64_encode(text))
]
if font_name:
fop.append("font/{}".format(urlsafe_base64_encode(font_name)))
if font_size:
fop.append("fontsize/{}".format(font_size))
if text_color:
fop.append("fill/{}".format(urlsafe_base64_encode('#' + text_color)))
if dissolve:
fop.append("dissolve/{}".format(dissolve))
if gravity:
fop.append("gravity/{}".format(gravity))
if dis_x:
fop.append("dx/{}".format(dis_x))
if dis_y:
fop.append("dy/{}".format(dis_y))
fops = '/'.join(fop)
fops = fops + '|saveas/' + urlsafe_base64_encode(bucket_name + ':' + saved_name)
pfop = PersistentFop(cls.q, bucket_name)
ret, info = pfop.execute(image_url, [fops], 1)
if ret:
return ret['persistentId']
return None
@classmethod
def batch_move_pic(cls, filename_list, bucket_name, new_bucket_name):
keys = {}
for filename in filename_list:
keys[filename] = filename
ops = build_batch_copy(bucket_name, keys, new_bucket_name)
if ops:
ret, infos = cls.bucket.batch(ops)
if infos:
i = 0
result = []
infos = json.loads(infos.text_body)
for info in infos:
if info['code'] == 614 or info['code'] == 200:
result.append(filename_list[i])
else:
result.append(None)
i += 1
return result
return []
@classmethod
def set_picture_watermark(cls, img_url, water_mark_url, dissolve=100, gravity='NorthEast', dx=40, dy=40):
"""
给已存在七牛云上的图片,设置图片水印
https://developer.qiniu.com/dora/manual/1316/image-watermarking-processing-watermark
:param img_url: 已存在于七牛的图片地址
:param water_mark_url: 水印原图片地址
:param dissolve: 透明度 0 - 200
:param gravity: 水印位置
:param dx: 横轴边距
:param dy: 纵轴边距
:return: 拼接之后的图片地址
"""
if not img_url or not water_mark_url:
return ""
base64_url = urlsafe_base64_encode(water_mark_url)
f_url = "{img_url}?watermark/1/image/{base64_url}/dissolve/{dissolve}/gravity/{gravity}/dx/{dx}/dy/{dy}".format(
img_url=img_url,
base64_url=base64_url,
dissolve=dissolve,
gravity=gravity,
dx=dx,
dy=dy
)
return f_url
@classmethod
def mkzip(cls, bucket, index_file_name, new_file_name):
"""
多文件压缩
:param bucket: 空间名称
:param index_file_name: 需要压缩的文件地址所在的文件路径
:param new_file_name: 生成压缩文件
:return:
"""
key = index_file_name
fops = 'mkzip/4/'
saveas_key = urlsafe_base64_encode(bucket + ':' + new_file_name)
fops = fops + '|saveas/' + saveas_key
pfop = PersistentFop(cls.q, bucket)
ops = [fops]
ret, info = pfop.execute(key, ops, 1)
return ret['persistentId']
def gen_rnd_filename(ext=None):
prefix = datetime.datetime.today().strftime("%Y/%m/%d")
md5 = hashlib.md5(str(time.time()).encode("utf8")).hexdigest()[:10]
md5 = prefix + '/' + md5
if ext is not None:
md5 = md5 + '.' + ext
return md5
def upload_with_short(image_file, img_type=IMG_TYPE.DIARY, save_name=None, platform='qiniu'):
from gm_upload.utils.qiniu_tool import QiniuTool
manager = GmImageManager(img_type)
prefix = manager.get_prefix()
if not save_name:
save_name = gen_rnd_filename()
if prefix:
save_name = prefix + '/' + save_name
QiniuTool.upload(image_file, save_name, manager.get_bucket())
return get_full_path(save_name, manager.get_domain()), save_name
def upload(image_file, img_type=IMG_TYPE.DIARY, save_name=None, platform='qiniu'):
""" 上传图片并返回全路径
"""
return upload_with_short(image_file, img_type, save_name, platform)[0]
def store_picture_and_get_key(picture):
etx = None
save_name = gen_rnd_filename(etx)
key = upload(picture, IMG_TYPE.NOWATERMARK, save_name)
return key
def get_domain(domain):
""" domain 补全http/https, 若domain不带http/https前缀,默认加http前缀
"""
# 若domain不带http/https前缀,默认加http前缀
if not domain.startswith('http://') and not domain.startswith('https://'):
domain = 'http://' + domain
if not domain.endswith('/'):
domain = domain + '/'
return domain
def get_full_path(path_name, domain, extra=''):
domain = get_domain(domain)
try:
p = parse.unquote(path_name)
path_name = p
except:
pass
# add https
if path_name and ( path_name.startswith('http://') or path_name.startswith('https://') ):
full_path = path_name
elif path_name:
full_path = parse.urljoin(domain, path_name)
else:
full_path = ''
return full_path + extra
def upload_file(file_path, img_type=IMG_TYPE.NOWATERMARK, save_name=None, platform='qiniu'):
""" 上传本地文件并返回全路径
"""
return upload_file_with_short(file_path, img_type, save_name, platform)[0]
def upload_file_with_short(file_path, img_type=IMG_TYPE.WATERMARK, save_name=None, platform='qiniu'):
manager = GmImageManager(img_type)
prefix = manager.get_prefix()
if not save_name:
save_name = gen_rnd_filename()
if prefix:
save_name = prefix + '/' + save_name
QiniuTool.upload_file(file_path, save_name, manager.get_bucket())
return get_full_path(save_name, manager.get_domain()), save_name
import os
import re
import time
import pymysql
import requests
import hashlib
import cv2
import execjs
from datetime import datetime
from image_qiniu import upload_file, IMG_TYPE
DATA_OS_PATH = '/Users/haowei/workspace/gm/crawler/image'
PROJECT_PATH = '/Users/haowei/workspace/gm/crawler'
class UploadImage(object):
def __init__(self):
HOST = '172.18.51.14'
PORT = 3306
USER = 'spider'
PASSWD = 'Gengmei123'
DB = 'spider'
self.conn = pymysql.connect(host=HOST, port=PORT, user=USER,
passwd=PASSWD,
db=DB, charset='utf8')
self.cur = self.conn.cursor()
self.ANSWER_PICTURE_PATH = DATA_OS_PATH + '/answer_picture/'
self.ARTICLE_PICTURE_PATH = DATA_OS_PATH + '/article_picture/'
self.THOUGHT_PICTURE_PATH = DATA_OS_PATH + '/thought_picture/'
self.ANSWER_PICTURE_CUT_PATH = DATA_OS_PATH + '/answer_picture_cut/'
self.ARTICLE_PICTURE_CUT_PATH = DATA_OS_PATH + '/article_picture_cut/'
self.THOUGHT_PICTURE_CUT_PATH = DATA_OS_PATH + '/thought_picture_cut/'
self.JS_FILE_PATH = PROJECT_PATH + '/crawler_sys/site_crawler/zhihu.js'
os.environ["EXECJS_RUNTIME"] = 'Node'
try:
with open('./zhihu.js', 'r', encoding='utf-8') as f:
js = f.read()
except:
with open(self.JS_FILE_PATH, 'r', encoding='utf-8') as f:
js = f.read()
# print(js)
self.exec_js = execjs.compile(js)
def get_serach_page_cookies(self):
'''
cookies更新
'''
url = "https://www.zhihu.com/people/geng-mei-suo-chang/answers?page=1"
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "max-age=0",
"cookie": '_SESSIONID=rvJz2mbRjCqJFwvD79ADGb8gpdpRkWXAsdVDqOVALgh; JOID=UVkSBEtHLEUgV_KGUUMd3fULiLJHZglmBHfTo3JnDWADc9KndNqP8XtW9oBWoLo5jsAnAcPG0-JwbPp8rD1JsFI=; osd=W1gdB05NLUojUviHXkAY1_QEi7dNZwZlAX3SrHFiB2EMcNetddWM9HFX-YNTqrs2jcUtAMzF1uhxY_l5pjxGs1c=; _zap=cc1330a6-d6e9-4b25-8232-0b0481e37ea2; _xsrf=4798d610-1db7-489f-8c14-d608c0bbcf08; d_c0="AOCcdS0CNBKPToRkxgVd-8qBKKgTDM9yF4Y=|1605507877"; capsion_ticket="2|1:0|10:1605684439|14:capsion_ticket|44:MzdiN2JlOTZlNWE2NGU5Zjg0MDNiODFlN2ViMjQzNGU=|b84d77e03b5e9447a3740a614d208ca5f3bc1e4fe15fe9b46f1a2a5702da5f99"; SESSIONID=nplCMucg2EXp8xNQAz74jmaLovUS9CG4rVtcFY4jWLT; JOID=V1sUBk8zRBeITtMgLDNzj10QrRI6WzZ_ygKgQU8AB038d6pof-COptdJ1yYpGa7oR9-1bgHql0Hfs4FvUF_YW-A=; osd=Ul4XBkI2QRSIQ9YlLzN-ilgTrR8_XjV_xwelQk8NAkj_d6dteuOOq9JM1CYkHKvrR9KwawLqmkTasIFiVVrbW-0=; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1605608394,1605684435,1605684439,1605684455; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1605684522; KLBRSID=fe0fceb358d671fa6cc33898c8c48b48|1605685245|1605683234',
"referer": "https://www.zhihu.com/people/geng-mei-suo-chang/answers?page=1",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36",
}
requests_res = self.retry_get_url(url, headers=headers)
return requests_res.cookies.get_dict()
def headers_handle(self, url):
'''
url请求中的头部伪装
'''
res_cookies_dict = self.get_serach_page_cookies()
headers_search = {
"accept": "*/*",
"accept-encoding": "gzip, deflate",
"accept-language": "zh-CN,zh;q=0.9",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36",
"x-ab-param": "li_yxzl_new_style_a=1;se_major=0;se_adsrank=4;se_hi_trunc=0;se_v053=1;li_panswer_topic=0;zr_test_aa1=1;pf_noti_entry_num=2;zr_search_sim2=2;zr_slotpaidexp=2;se_zp_boost=0;tp_club_entrance=1;pf_profile2_tab=0;ug_newtag=1;li_answer_card=0;ls_recommend_test=5;qap_labeltype=1;zr_rec_answer_cp=open;se_sug_term=0;tp_topic_tab=0;ge_ge01=5;se_wil_act=0;se_videobox=0;tsp_ad_cardredesign=0;qap_question_visitor= 0;zr_slot_training=2;tp_clubhyb=0;li_ebook_gen_search=2;se_v_v005=0;zw_sameq_sorce=999;ge_ge02=6;se_mobilecard=0;se_auth_src=0;tp_header_style=1;tp_flow_ctr=0;pf_creator_card=1;li_viptab_name=0;zr_intervene=0;se_bert128=1;se_ffzx_jushen1=0;top_v_album=1;se_preset=0;tp_discover=1;ls_fmp4=0;tp_club_top=0;top_universalebook=1;li_svip_cardshow=1;li_paid_answer_exp=0;tp_topic_style=0;zr_art_rec=base;se_colorfultab=1;se_auth_src2=0;tp_club_qa_entrance=1;tp_club__entrance2=1;tsp_hotlist_ui=3;li_svip_tab_search=1;se_entity22=1;tp_meta_card=0;tp_topic_tab_new=0-0-0;tp_zrec=0;top_ebook=0;pf_adjust=1;qap_question_author=0;zr_topic_rpc=0;se_topicfeed=0;tp_club_feed=0;tsp_ioscard2=0;zr_rel_search=base;se_recommend=1;se_usercard=0;tp_club_fdv4=0;tp_m_intro_re_topic=1;pf_foltopic_usernum=0;li_vip_verti_search=0;zr_training_boost=false;se_v054=0;tp_contents=1;soc_feed_intelligent=3;tsp_ios_cardredesign=0;pf_fuceng=1;pf_newguide_vertical=0;ug_follow_topic_1=2;ls_video_commercial=0;li_car_meta=1;se_sug_dnn=0;tp_fenqu_wei=0;li_catalog_card=1;top_quality=0;se_click_v_v=1;se_aa_base=1;se_club_ui=0;se_return_1=0;soc_notification=1;zr_ans_rec=gbrank;zr_search_paid=1;zr_expslotpaid=3;zr_rerank=0;se_college=default;se_whitelist=1;top_root=0;li_yxxq_aut=A1;tsp_adcard2=0;ls_videoad=2;se_col_boost=1;li_edu_page=old;zr_training_first=false;se_t2sug=1;se_vbert3=0;se_merge=0;li_video_section=1;zr_km_answer=open_cvr;zr_sim3=0;se_v_v006=0;tp_dingyue_video=0;li_topics_search=0;se_searchwiki=0;se_guess=0;se_major_v2=0;tp_club_bt=0;tp_sft=a;top_test_4_liguangyi=1",
"x-api-version": "3.0.91",
"x-app-za": "OS=Web",
"x-requested-with": "fetch",
"x-zse-83": "3_2.0",
"x-zse-86": None,
"referer": "https://www.zhihu.com/api/v4/members/geng-mei-suo-chang/answers?page=1",
}
cookies_dict = {
"d_c0": '"AOCcdS0CNBKPToRkxgVd-8qBKKgTDM9yF4Y=|1605507877"',
"KLBRSID": None
}
cookies_dict.update(res_cookies_dict)
f = "+".join(
["3_2.0", url.replace("https://www.zhihu.com", ""), headers_search["referer"], cookies_dict["d_c0"]])
fmd5 = hashlib.new('md5', f.encode()).hexdigest()
headers_search["x-zse-86"] = "1.0_" + self.exec_js.call("b", fmd5)
return headers_search, cookies_dict
@staticmethod
def retry_get_url(url, retrys=3, proxies=None, timeout=10, **kwargs):
retry_c = 0
while retry_c < retrys:
try:
get_resp = requests.get(url, timeout=timeout, **kwargs)
return get_resp
except Exception as e:
retry_c += 1
time.sleep(1)
print(e)
print('Failed to get page %s after %d retries, %s'
% (url, retrys, datetime.now()))
return None
@staticmethod
def upload_image_with_path(path, img_type=IMG_TYPE.TOPICIMAGE):
'''
图片上传并得到新url
'''
'''非站内图片处理'''
try:
url = upload_file(file_path=path, img_type=img_type)
print('upload ..... ', url)
return url
except:
print('upload ..... error')
return None
def picture_download_and_cut(self, path, new_path, table, key_id):
'''
文章图片剪切和下载
'''
sql = """select {}, url from {}""".format(key_id, table)
self.cur.execute(sql)
tuple = self.cur.fetchall()
self.conn.commit()
gif_patt = r'gif'
for i in range(len(tuple)):
mark = re.search(gif_patt, tuple[i][1])
url = tuple[i][1]
[headers_search, cookies_dict] = self.headers_handle(url)
r = self.retry_get_url(url, headers=headers_search, cookies=cookies_dict, proxies=0)
if mark:
pathes = path + str('num') + str(i) + '.gif'
with open(pathes, 'wb') as f: # 打开写入到path路径里-二进制文件,返回的句柄名为f
f.write(r.content) # 往f里写入r对象的二进制文件
f.close()
new_url = self.upload_image_with_path(pathes)
sql = """UPDATE {} SET new_url = "{}" WHERE url = "{}" """.format(
table, str(new_url), str(tuple[i][1]))
self.cur.execute(sql)
self.conn.commit()
else:
pathes = path + str('num') + str(i) + '.jpg'
with open(pathes, 'wb') as f: # 打开写入到path路径里-二进制文件,返回的句柄名为f
f.write(r.content) # 往f里写入r对象的二进制文件
f.close()
img = cv2.imread(pathes)
high, width = img.shape[:2]
cropped = img[0:int(high / 10 * 9), 0:width]
pathes = new_path + "num" + str(i) + ".jpg"
cv2.imwrite(pathes, cropped)
new_url = self.upload_image_with_path(pathes)
sql = """UPDATE {} SET new_url = "{}" WHERE url = "{}" """.format(
table, str(new_url), str(tuple[i][1]))
self.cur.execute(sql)
self.conn.commit()
def picture_download_and_cut_process(self):
self.picture_download_and_cut(self.ANSWER_PICTURE_PATH, self.ANSWER_PICTURE_CUT_PATH,
'zhihu_answer_picture_url', 'answer_id')
self.picture_download_and_cut(self.ARTICLE_PICTURE_PATH, self.ARTICLE_PICTURE_CUT_PATH,
'zhihu_article_picture_url', 'article_id')
self.picture_download_and_cut(self.THOUGHT_PICTURE_PATH, self.THOUGHT_PICTURE_CUT_PATH,
'zhihu_thought_picture_url', 'thought_id')
if __name__ == '__main__':
print(datetime.now())
a = UploadImage()
a.picture_download_and_cut_process()
print(datetime.now())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment