Commit bbc0557a authored by 任婷婷's avatar 任婷婷

age gender dist

parents
Metadata-Version: 1.1
Name: AgeGenderDist
Version: 0.1.0
Summary: A short description of the project
Home-page: https://github.com/rentingting/AgeGenderDist
Author: rentingting
Author-email: Your address email (eq. you@example.com)
License: BSD
Download-URL: https://github.com/rentingting/AgeGenderDist/tarball/0.1.0
Description: AgeGenderDist
===============================
version number: 0.1.0
author: rentingting
Overview
--------
A short description of the project
Installation / Usage
--------------------
To install use pip:
$ pip install AgeGenderDist
Or clone the repo:
$ git clone https://github.com/rentingting/AgeGenderDist.git
$ python setup.py install
Contributing
------------
TBD
Example
-------
TBD
Platform: UNKNOWN
Classifier: Development Status :: 3 - Alpha
Classifier: Intended Audience :: Developers
Classifier: Programming Language :: Python :: 3
MANIFEST.in
README.md
requirements.txt
setup.cfg
setup.py
AgeGenderDist/__init__.py
AgeGenderDist/face_image.py
AgeGenderDist/face_model.py
AgeGenderDist/face_preprocess.py
AgeGenderDist/gender.py
AgeGenderDist/helper.py
AgeGenderDist/mtcnn_detector.py
AgeGenderDist/noise_sgd.py
AgeGenderDist.egg-info/PKG-INFO
AgeGenderDist.egg-info/SOURCES.txt
AgeGenderDist.egg-info/dependency_links.txt
AgeGenderDist.egg-info/requires.txt
AgeGenderDist.egg-info/top_level.txt
AgeGenderDist/model/model-0000.params
AgeGenderDist/model/model-symbol.json
AgeGenderDist/mtcnn-model/det1-0001.params
AgeGenderDist/mtcnn-model/det1-symbol.json
AgeGenderDist/mtcnn-model/det1.caffemodel
AgeGenderDist/mtcnn-model/det1.prototxt
AgeGenderDist/mtcnn-model/det2-0001.params
AgeGenderDist/mtcnn-model/det2-symbol.json
AgeGenderDist/mtcnn-model/det2.caffemodel
AgeGenderDist/mtcnn-model/det2.prototxt
AgeGenderDist/mtcnn-model/det3-0001.params
AgeGenderDist/mtcnn-model/det3-symbol.json
AgeGenderDist/mtcnn-model/det3.caffemodel
AgeGenderDist/mtcnn-model/det3.prototxt
AgeGenderDist/mtcnn-model/det4-0001.params
AgeGenderDist/mtcnn-model/det4-symbol.json
AgeGenderDist/mtcnn-model/det4.caffemodel
AgeGenderDist/mtcnn-model/det4.prototxt
\ No newline at end of file
mxnet-cu100==1.4.1
scipy==1.2.2
scikit-learn==0.20.3
opencv-python==4.1.0.25
easydict==1.9
scikit-image==0.14.3
import AgeGenderDist.face_model
import argparse
import cv2
import sys
import numpy as np
import datetime
from AgeGenderDist.mtcnn_detector import MtcnnDetector
import os
import mxnet as mx
import math
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src', 'common'))
parser = argparse.ArgumentParser(description='face model test')
# general
parser.add_argument('--image-size', default='112,112', help='')
parser.add_argument('--image', default='Tom_Hanks_54745.png', help='')
parser.add_argument('--model', default='model/model,0', help='path to load model.')
parser.add_argument('--gpu', default=0, type=int, help='gpu id')
parser.add_argument('--det', default=0, type=int, help='mtcnn option, 1 means using R+O, 0 means detect from begining')
args = parser.parse_args()
def get_age_gender_dist(img_src):
if args.gpu>=0:
ctx = mx.gpu(args.gpu)
else:
ctx = mx.cpu()
det_threshold = [0.6,0.7,0.8]
mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
if args.det==0:
detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark = True, threshold=det_threshold)
else:
detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark = True, threshold=[0.0,0.0,0.2])
ret = detector.detect_face(img_src, det_type = args.det)
if ret is None:
print('ret is none')
bbox, points = ret
points = points[0,:].reshape((2,5)).T
im=img_src.copy()
lf_eye=points[0]
rt_eye=points[1]
tmp=rt_eye-lf_eye
dist=math.hypot(tmp[0],tmp[1])
model=face_model.FaceModel(args)
img=model.get_input(img_src)
gender,age=model.get_ga(img)
return age,gender,dist
def get_gender_age(img):
model=face_model.FaceModel(args)
img=model.get_input(img)
gender,age=model.get_ga(img)
return gender,age
if __name__=='__main__':
dirs= os.listdir(args.image)
for file in dirs:
print('file',file)
imgdir=os.path.join('testimg',file)
img_src = cv2.imread(imgdir)
gender,age=get_gender_age(img_src)
age,gender,dist=get_age_gender_dist(img_src)
print(age,gender,dist)
#print('gender:',gender)
from easydict import EasyDict as edict
import os
import json
import numpy as np
def load_property(data_dir):
prop = edict()
for line in open(os.path.join(data_dir, 'property')):
vec = line.strip().split(',')
assert len(vec)==3
prop.num_classes = int(vec[0])
prop.image_size = [int(vec[1]), int(vec[2])]
return prop
def get_dataset_webface(input_dir):
clean_list_file = input_dir+"_clean_list.txt"
ret = []
for line in open(clean_list_file, 'r'):
vec = line.strip().split()
assert len(vec)==2
fimage = edict()
fimage.id = vec[0].replace("\\", '/')
fimage.classname = vec[1]
fimage.image_path = os.path.join(input_dir, fimage.id)
ret.append(fimage)
return ret
def get_dataset_celeb(input_dir):
clean_list_file = input_dir+"_clean_list.txt"
ret = []
dir2label = {}
for line in open(clean_list_file, 'r'):
line = line.strip()
if not line.startswith('./m.'):
continue
line = line[2:]
vec = line.split('/')
assert len(vec)==2
if vec[0] in dir2label:
label = dir2label[vec[0]]
else:
label = len(dir2label)
dir2label[vec[0]] = label
fimage = edict()
fimage.id = line
fimage.classname = str(label)
fimage.image_path = os.path.join(input_dir, fimage.id)
ret.append(fimage)
return ret
def _get_dataset_celeb(input_dir):
list_file = input_dir+"_original_list.txt"
ret = []
for line in open(list_file, 'r'):
vec = line.strip().split()
assert len(vec)==2
fimage = edict()
fimage.id = vec[0]
fimage.classname = vec[1]
fimage.image_path = os.path.join(input_dir, fimage.id)
ret.append(fimage)
return ret
def get_dataset_facescrub(input_dir):
ret = []
label = 0
person_names = []
for person_name in os.listdir(input_dir):
person_names.append(person_name)
person_names = sorted(person_names)
for person_name in person_names:
subdir = os.path.join(input_dir, person_name)
if not os.path.isdir(subdir):
continue
for _img in os.listdir(subdir):
fimage = edict()
fimage.id = os.path.join(person_name, _img)
fimage.classname = str(label)
fimage.image_path = os.path.join(subdir, _img)
fimage.landmark = None
fimage.bbox = None
ret.append(fimage)
label += 1
return ret
def get_dataset_megaface(input_dir):
ret = []
label = 0
for prefixdir in os.listdir(input_dir):
_prefixdir = os.path.join(input_dir, prefixdir)
for subdir in os.listdir(_prefixdir):
_subdir = os.path.join(_prefixdir, subdir)
if not os.path.isdir(_subdir):
continue
for img in os.listdir(_subdir):
if not img.endswith('.jpg.jpg') and img.endswith('.jpg'):
fimage = edict()
fimage.id = os.path.join(prefixdir, subdir, img)
fimage.classname = str(label)
fimage.image_path = os.path.join(_subdir, img)
json_file = fimage.image_path+".json"
data = None
fimage.bbox = None
fimage.landmark = None
if os.path.exists(json_file):
with open(json_file, 'r') as f:
data = f.read()
data = json.loads(data)
assert data is not None
if 'bounding_box' in data:
fimage.bbox = np.zeros( (4,), dtype=np.float32 )
bb = data['bounding_box']
fimage.bbox[0] = bb['x']
fimage.bbox[1] = bb['y']
fimage.bbox[2] = bb['x']+bb['width']
fimage.bbox[3] = bb['y']+bb['height']
#print('bb')
if 'landmarks' in data:
landmarks = data['landmarks']
if '1' in landmarks and '0' in landmarks and '2' in landmarks:
fimage.landmark = np.zeros( (3,2), dtype=np.float32 )
fimage.landmark[0][0] = landmarks['1']['x']
fimage.landmark[0][1] = landmarks['1']['y']
fimage.landmark[1][0] = landmarks['0']['x']
fimage.landmark[1][1] = landmarks['0']['y']
fimage.landmark[2][0] = landmarks['2']['x']
fimage.landmark[2][1] = landmarks['2']['y']
#print('lm')
ret.append(fimage)
label+=1
return ret
def get_dataset_fgnet(input_dir):
ret = []
label = 0
for subdir in os.listdir(input_dir):
_subdir = os.path.join(input_dir, subdir)
if not os.path.isdir(_subdir):
continue
for img in os.listdir(_subdir):
if img.endswith('.JPG'):
fimage = edict()
fimage.id = os.path.join(_subdir, img)
fimage.classname = str(label)
fimage.image_path = os.path.join(_subdir, img)
json_file = fimage.image_path+".json"
data = None
fimage.bbox = None
fimage.landmark = None
if os.path.exists(json_file):
with open(json_file, 'r') as f:
data = f.read()
data = json.loads(data)
assert data is not None
if 'bounding_box' in data:
fimage.bbox = np.zeros( (4,), dtype=np.float32 )
bb = data['bounding_box']
fimage.bbox[0] = bb['x']
fimage.bbox[1] = bb['y']
fimage.bbox[2] = bb['x']+bb['width']
fimage.bbox[3] = bb['y']+bb['height']
#print('bb')
if 'landmarks' in data:
landmarks = data['landmarks']
if '1' in landmarks and '0' in landmarks and '2' in landmarks:
fimage.landmark = np.zeros( (3,2), dtype=np.float32 )
fimage.landmark[0][0] = landmarks['1']['x']
fimage.landmark[0][1] = landmarks['1']['y']
fimage.landmark[1][0] = landmarks['0']['x']
fimage.landmark[1][1] = landmarks['0']['y']
fimage.landmark[2][0] = landmarks['2']['x']
fimage.landmark[2][1] = landmarks['2']['y']
#print('lm')
#fimage.landmark = None
ret.append(fimage)
label+=1
return ret
def get_dataset_ytf(input_dir):
ret = []
label = 0
person_names = []
for person_name in os.listdir(input_dir):
person_names.append(person_name)
person_names = sorted(person_names)
for person_name in person_names:
_subdir = os.path.join(input_dir, person_name)
if not os.path.isdir(_subdir):
continue
for _subdir2 in os.listdir(_subdir):
_subdir2 = os.path.join(_subdir, _subdir2)
if not os.path.isdir(_subdir2):
continue
_ret = []
for img in os.listdir(_subdir2):
fimage = edict()
fimage.id = os.path.join(_subdir2, img)
fimage.classname = str(label)
fimage.image_path = os.path.join(_subdir2, img)
fimage.bbox = None
fimage.landmark = None
_ret.append(fimage)
ret += _ret
label+=1
return ret
def get_dataset_clfw(input_dir):
ret = []
label = 0
for img in os.listdir(input_dir):
fimage = edict()
fimage.id = img
fimage.classname = str(0)
fimage.image_path = os.path.join(input_dir, img)
fimage.bbox = None
fimage.landmark = None
ret.append(fimage)
return ret
def get_dataset_common(input_dir, min_images = 1):
ret = []
label = 0
person_names = []
for person_name in os.listdir(input_dir):
person_names.append(person_name)
person_names = sorted(person_names)
for person_name in person_names:
_subdir = os.path.join(input_dir, person_name)
if not os.path.isdir(_subdir):
continue
_ret = []
for img in os.listdir(_subdir):
fimage = edict()
fimage.id = os.path.join(person_name, img)
fimage.classname = str(label)
fimage.image_path = os.path.join(_subdir, img)
fimage.bbox = None
fimage.landmark = None
_ret.append(fimage)
if len(_ret)>=min_images:
ret += _ret
label+=1
return ret
def get_dataset(name, input_dir):
if name=='webface' or name=='lfw' or name=='vgg':
return get_dataset_common(input_dir)
if name=='celeb':
return get_dataset_celeb(input_dir)
if name=='facescrub':
return get_dataset_facescrub(input_dir)
if name=='megaface':
return get_dataset_megaface(input_dir)
if name=='fgnet':
return get_dataset_fgnet(input_dir)
if name=='ytf':
return get_dataset_ytf(input_dir)
if name=='clfw':
return get_dataset_clfw(input_dir)
return None
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from scipy import misc
import sys
import os
import argparse
#import tensorflow as tf
import numpy as np
import mxnet as mx
import random
import cv2
import sklearn
from sklearn.decomposition import PCA
from time import sleep
from easydict import EasyDict as edict
from AgeGenderDist.mtcnn_detector import MtcnnDetector
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src', 'common'))
import AgeGenderDist.face_image
import AgeGenderDist.face_preprocess
def do_flip(data):
for idx in range(data.shape[0]):
data[idx,:,:] = np.fliplr(data[idx,:,:])
def get_model(ctx, image_size, model_str, layer):
_vec = model_str.split(',')
assert len(_vec)==2
prefix = _vec[0]
epoch = int(_vec[1])
print('loading',prefix, epoch)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
all_layers = sym.get_internals()
sym = all_layers[layer+'_output']
model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
#model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
model.set_params(arg_params, aux_params)
return model
class FaceModel:
def __init__(self, args):
self.args = args
if args.gpu>=0:
ctx = mx.gpu(args.gpu)
else:
ctx = mx.cpu()
_vec = args.image_size.split(',')
assert len(_vec)==2
image_size = (int(_vec[0]), int(_vec[1]))
self.model = None
if len(args.model)>0:
self.model = get_model(ctx, image_size, args.model, 'fc1')
self.det_minsize = 50
self.det_threshold = [0.6,0.7,0.8]
#self.det_factor = 0.9
self.image_size = image_size
mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
if args.det==0:
detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark = True, threshold=self.det_threshold)
else:
detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark = True, threshold=[0.0,0.0,0.2])
self.detector = detector
def get_input(self, face_img):
ret = self.detector.detect_face(face_img, det_type = self.args.det)
#print('ret',ret)
if ret is None:
return None
bbox, points = ret
if bbox.shape[0]==0:
return None
bbox = bbox[0,0:4]
points = points[0,:].reshape((2,5)).T
#print(bbox)
#print(points)
nimg = face_preprocess.preprocess(face_img, bbox, points, image_size='112,112')
nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB)
aligned = np.transpose(nimg, (2,0,1))
input_blob = np.expand_dims(aligned, axis=0)
data = mx.nd.array(input_blob)
db = mx.io.DataBatch(data=(data,))
return db
def get_ga(self, data):
self.model.forward(data, is_train=False)
ret = self.model.get_outputs()[0].asnumpy()
g = ret[:,0:2].flatten()
gender = np.argmax(g)
a = ret[:,2:202].reshape( (100,2) )
a = np.argmax(a, axis=1)
age = int(sum(a))
return gender, age
import cv2
import numpy as np
from skimage import transform as trans
def parse_lst_line(line):
vec = line.strip().split("\t")
assert len(vec)>=3
aligned = int(vec[0])
image_path = vec[1]
label = int(vec[2])
bbox = None
landmark = None
#print(vec)
if len(vec)>3:
bbox = np.zeros( (4,), dtype=np.int32)
for i in xrange(3,7):
bbox[i-3] = int(vec[i])
landmark = None
if len(vec)>7:
_l = []
for i in xrange(7,17):
_l.append(float(vec[i]))
landmark = np.array(_l).reshape( (2,5) ).T
#print(aligned)
return image_path, label, bbox, landmark, aligned
def read_image(img_path, **kwargs):
mode = kwargs.get('mode', 'rgb')
layout = kwargs.get('layout', 'HWC')
if mode=='gray':
img = cv2.imread(img_path, cv2.CV_LOAD_IMAGE_GRAYSCALE)
else:
img = cv2.imread(img_path, cv2.CV_LOAD_IMAGE_COLOR)
if mode=='rgb':
#print('to rgb')
img = img[...,::-1]
if layout=='CHW':
img = np.transpose(img, (2,0,1))
return img
def preprocess(img, bbox=None, landmark=None, **kwargs):
if isinstance(img, str):
img = read_image(img, **kwargs)
M = None
image_size = []
str_image_size = kwargs.get('image_size', '')
if len(str_image_size)>0:
image_size = [int(x) for x in str_image_size.split(',')]
if len(image_size)==1:
image_size = [image_size[0], image_size[0]]
assert len(image_size)==2
assert image_size[0]==112
assert image_size[0]==112 or image_size[1]==96
if landmark is not None:
assert len(image_size)==2
src = np.array([
[30.2946, 51.6963],
[65.5318, 51.5014],
[48.0252, 71.7366],
[33.5493, 92.3655],
[62.7299, 92.2041] ], dtype=np.float32 )
if image_size[1]==112:
src[:,0] += 8.0
dst = landmark.astype(np.float32)
tform = trans.SimilarityTransform()
tform.estimate(dst, src)
M = tform.params[0:2,:]
#M = cv2.estimateRigidTransform( dst.reshape(1,5,2), src.reshape(1,5,2), False)
if M is None:
if bbox is None: #use center crop
det = np.zeros(4, dtype=np.int32)
det[0] = int(img.shape[1]*0.0625)
det[1] = int(img.shape[0]*0.0625)
det[2] = img.shape[1] - det[0]
det[3] = img.shape[0] - det[1]
else:
det = bbox
margin = kwargs.get('margin', 44)
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0]-margin/2, 0)
bb[1] = np.maximum(det[1]-margin/2, 0)
bb[2] = np.minimum(det[2]+margin/2, img.shape[1])
bb[3] = np.minimum(det[3]+margin/2, img.shape[0])
ret = img[bb[1]:bb[3],bb[0]:bb[2],:]
if len(image_size)>0:
ret = cv2.resize(ret, (image_size[1], image_size[0]))
return ret
else: #do align using landmark
assert len(image_size)==2
#src = src[0:3,:]
#dst = dst[0:3,:]
#print(src.shape, dst.shape)
#print(src)
#print(dst)
#print(M)
warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0)
#tform3 = trans.ProjectiveTransform()
#tform3.estimate(src, dst)
#warped = trans.warp(img, tform3, output_shape=_shape)
return warped
import AgeGenderDist.face_model
import argparse
import cv2
import sys
import numpy as np
import datetime
from AgeGenderDist.mtcnn_detector import MtcnnDetector
import os
import mxnet as mx
import math
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src', 'common'))
parser = argparse.ArgumentParser(description='face model test')
# general
parser.add_argument('--image-size', default='112,112', help='')
parser.add_argument('--image', default='Tom_Hanks_54745.png', help='')
parser.add_argument('--model', default='model/model,0', help='path to load model.')
parser.add_argument('--gpu', default=0, type=int, help='gpu id')
parser.add_argument('--det', default=0, type=int, help='mtcnn option, 1 means using R+O, 0 means detect from begining')
args = parser.parse_args()
def get_age_gender_dist(img_src):
if args.gpu>=0:
ctx = mx.gpu(args.gpu)
else:
ctx = mx.cpu()
det_threshold = [0.6,0.7,0.8]
mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
if args.det==0:
detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark = True, threshold=det_threshold)
else:
detector = MtcnnDetector(model_folder=mtcnn_path, ctx=ctx, num_worker=1, accurate_landmark = True, threshold=[0.0,0.0,0.2])
ret = detector.detect_face(img_src, det_type = args.det)
if ret is None:
print('ret is none')
bbox, points = ret
points = points[0,:].reshape((2,5)).T
im=img_src.copy()
lf_eye=points[0]
rt_eye=points[1]
tmp=rt_eye-lf_eye
dist=math.hypot(tmp[0],tmp[1])
model=face_model.FaceModel(args)
img=model.get_input(img_src)
gender,age=model.get_ga(img)
return age,gender,dist
def get_gender_age(img):
model=face_model.FaceModel(args)
img=model.get_input(img)
gender,age=model.get_ga(img)
return gender,age
if __name__=='__main__':
dirs= os.listdir(args.image)
for file in dirs:
print('file',file)
imgdir=os.path.join('testimg',file)
img_src = cv2.imread(imgdir)
gender,age=get_gender_age(img_src)
age,gender,dist=get_age_gender_dist(img_src)
print(age,gender,dist)
#print('gender:',gender)
# coding: utf-8
# YuanYang
import math
import cv2
import numpy as np
def nms(boxes, overlap_threshold, mode='Union'):
"""
non max suppression
Parameters:
----------
box: numpy array n x 5
input bbox array
overlap_threshold: float number
threshold of overlap
mode: float number
how to compute overlap ratio, 'Union' or 'Min'
Returns:
-------
index array of the selected bbox
"""
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(score)
# keep looping while some indexes still remain in the indexes list
while len(idxs) > 0:
# grab the last index in the indexes list and add the index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
inter = w * h
if mode == 'Min':
overlap = inter / np.minimum(area[i], area[idxs[:last]])
else:
overlap = inter / (area[i] + area[idxs[:last]] - inter)
# delete all indexes from the index list that have
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlap_threshold)[0])))
return pick
def adjust_input(in_data):
"""
adjust the input from (h, w, c) to ( 1, c, h, w) for network input
Parameters:
----------
in_data: numpy array of shape (h, w, c)
input data
Returns:
-------
out_data: numpy array of shape (1, c, h, w)
reshaped array
"""
if in_data.dtype is not np.dtype('float32'):
out_data = in_data.astype(np.float32)
else:
out_data = in_data
out_data = out_data.transpose((2,0,1))
out_data = np.expand_dims(out_data, 0)
out_data = (out_data - 127.5)*0.0078125
return out_data
def generate_bbox(map, reg, scale, threshold):
"""
generate bbox from feature map
Parameters:
----------
map: numpy array , n x m x 1
detect score for each position
reg: numpy array , n x m x 4
bbox
scale: float number
scale of this detection
threshold: float number
detect threshold
Returns:
-------
bbox array
"""
stride = 2
cellsize = 12
t_index = np.where(map>threshold)
# find nothing
if t_index[0].size == 0:
return np.array([])
dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)]
reg = np.array([dx1, dy1, dx2, dy2])
score = map[t_index[0], t_index[1]]
boundingbox = np.vstack([np.round((stride*t_index[1]+1)/scale),
np.round((stride*t_index[0]+1)/scale),
np.round((stride*t_index[1]+1+cellsize)/scale),
np.round((stride*t_index[0]+1+cellsize)/scale),
score,
reg])
return boundingbox.T
def detect_first_stage(img, net, scale, threshold):
"""
run PNet for first stage
Parameters:
----------
img: numpy array, bgr order
input image
scale: float number
how much should the input image scale
net: PNet
worker
Returns:
-------
total_boxes : bboxes
"""
height, width, _ = img.shape
hs = int(math.ceil(height * scale))
ws = int(math.ceil(width * scale))
im_data = cv2.resize(img, (ws,hs))
# adjust for the network input
input_buf = adjust_input(im_data)
output = net.predict(input_buf)
boxes = generate_bbox(output[1][0,1,:,:], output[0], scale, threshold)
if boxes.size == 0:
return None
# nms
pick = nms(boxes[:,0:5], 0.5, mode='Union')
boxes = boxes[pick]
return boxes
def detect_first_stage_warpper( args ):
return detect_first_stage(*args)
This diff is collapsed.
{
"nodes": [
{
"op": "null",
"param": {},
"name": "data",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "10",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv1",
"inputs": [[0, 0], [1, 0], [2, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu1_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu1",
"inputs": [[3, 0], [4, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(2,2)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool1",
"inputs": [[5, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "16",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv2",
"inputs": [[6, 0], [7, 0], [8, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu2_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu2",
"inputs": [[9, 0], [10, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "32",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv3",
"inputs": [[11, 0], [12, 0], [13, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu3_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu3",
"inputs": [[14, 0], [15, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(1,1)",
"no_bias": "False",
"num_filter": "4",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv4_2",
"inputs": [[16, 0], [17, 0], [18, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(1,1)",
"no_bias": "False",
"num_filter": "2",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv4_1",
"inputs": [[16, 0], [20, 0], [21, 0]],
"backward_source_id": -1
},
{
"op": "SoftmaxActivation",
"param": {"mode": "channel"},
"name": "prob1",
"inputs": [[22, 0]],
"backward_source_id": -1
}
],
"arg_nodes": [
0,
1,
2,
4,
7,
8,
10,
12,
13,
15,
17,
18,
20,
21
],
"heads": [[19, 0], [23, 0]]
}
\ No newline at end of file
name: "PNet"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 12
input_dim: 12
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 10
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "PReLU1"
type: "PReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "PReLU2"
type: "PReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "conv3"
type: "Convolution"
bottom: "conv2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 32
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "PReLU3"
type: "PReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4-1"
type: "Convolution"
bottom: "conv3"
top: "conv4-1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 2
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4-2"
type: "Convolution"
bottom: "conv3"
top: "conv4-2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 4
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prob1"
type: "Softmax"
bottom: "conv4-1"
top: "prob1"
}
{
"nodes": [
{
"op": "null",
"param": {},
"name": "data",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "28",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv1",
"inputs": [[0, 0], [1, 0], [2, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu1_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu1",
"inputs": [[3, 0], [4, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool1",
"inputs": [[5, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "48",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv2",
"inputs": [[6, 0], [7, 0], [8, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu2_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu2",
"inputs": [[9, 0], [10, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool2",
"inputs": [[11, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(2,2)",
"no_bias": "False",
"num_filter": "64",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv3",
"inputs": [[12, 0], [13, 0], [14, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu3_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu3",
"inputs": [[15, 0], [16, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "128"
},
"name": "conv4",
"inputs": [[17, 0], [18, 0], [19, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu4_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu4",
"inputs": [[20, 0], [21, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "4"
},
"name": "conv5_2",
"inputs": [[22, 0], [23, 0], [24, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "2"
},
"name": "conv5_1",
"inputs": [[22, 0], [26, 0], [27, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prob1_label",
"inputs": [],
"backward_source_id": -1
},
{
"op": "SoftmaxOutput",
"param": {
"grad_scale": "1",
"ignore_label": "-1",
"multi_output": "False",
"normalization": "null",
"use_ignore": "False"
},
"name": "prob1",
"inputs": [[28, 0], [29, 0]],
"backward_source_id": -1
}
],
"arg_nodes": [
0,
1,
2,
4,
7,
8,
10,
13,
14,
16,
18,
19,
21,
23,
24,
26,
27,
29
],
"heads": [[25, 0], [30, 0]]
}
\ No newline at end of file
name: "RNet"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 24
input_dim: 24
##########################
######################
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 28
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1"
type: "PReLU"
bottom: "conv1"
top: "conv1"
propagate_down: true
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 48
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2"
type: "PReLU"
bottom: "conv2"
top: "conv2"
propagate_down: true
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
####################################
##################################
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 2
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3"
type: "PReLU"
bottom: "conv3"
top: "conv3"
propagate_down: true
}
###############################
###############################
layer {
name: "conv4"
type: "InnerProduct"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
inner_product_param {
num_output: 128
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu4"
type: "PReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5-1"
type: "InnerProduct"
bottom: "conv4"
top: "conv5-1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
inner_product_param {
num_output: 2
#kernel_size: 1
#stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv5-2"
type: "InnerProduct"
bottom: "conv4"
top: "conv5-2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 4
#kernel_size: 1
#stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prob1"
type: "Softmax"
bottom: "conv5-1"
top: "prob1"
}
\ No newline at end of file
{
"nodes": [
{
"op": "null",
"param": {},
"name": "data",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "32",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv1",
"inputs": [[0, 0], [1, 0], [2, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu1_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu1",
"inputs": [[3, 0], [4, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool1",
"inputs": [[5, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "64",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv2",
"inputs": [[6, 0], [7, 0], [8, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu2_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu2",
"inputs": [[9, 0], [10, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(3,3)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool2",
"inputs": [[11, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(3,3)",
"no_bias": "False",
"num_filter": "64",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv3",
"inputs": [[12, 0], [13, 0], [14, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu3_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu3",
"inputs": [[15, 0], [16, 0]],
"backward_source_id": -1
},
{
"op": "Pooling",
"param": {
"global_pool": "False",
"kernel": "(2,2)",
"pad": "(0,0)",
"pool_type": "max",
"pooling_convention": "full",
"stride": "(2,2)"
},
"name": "pool3",
"inputs": [[17, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv4_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "Convolution",
"param": {
"cudnn_off": "False",
"cudnn_tune": "off",
"dilate": "(1,1)",
"kernel": "(2,2)",
"no_bias": "False",
"num_filter": "128",
"num_group": "1",
"pad": "(0,0)",
"stride": "(1,1)",
"workspace": "1024"
},
"name": "conv4",
"inputs": [[18, 0], [19, 0], [20, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu4_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu4",
"inputs": [[21, 0], [22, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv5_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "256"
},
"name": "conv5",
"inputs": [[23, 0], [24, 0], [25, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prelu5_gamma",
"inputs": [],
"backward_source_id": -1
},
{
"op": "LeakyReLU",
"param": {
"act_type": "prelu",
"lower_bound": "0.125",
"slope": "0.25",
"upper_bound": "0.334"
},
"name": "prelu5",
"inputs": [[26, 0], [27, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_3_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_3_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "10"
},
"name": "conv6_3",
"inputs": [[28, 0], [29, 0], [30, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_2_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_2_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "4"
},
"name": "conv6_2",
"inputs": [[28, 0], [32, 0], [33, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_1_weight",
"inputs": [],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "conv6_1_bias",
"inputs": [],
"backward_source_id": -1
},
{
"op": "FullyConnected",
"param": {
"no_bias": "False",
"num_hidden": "2"
},
"name": "conv6_1",
"inputs": [[28, 0], [35, 0], [36, 0]],
"backward_source_id": -1
},
{
"op": "null",
"param": {},
"name": "prob1_label",
"inputs": [],
"backward_source_id": -1
},
{
"op": "SoftmaxOutput",
"param": {
"grad_scale": "1",
"ignore_label": "-1",
"multi_output": "False",
"normalization": "null",
"use_ignore": "False"
},
"name": "prob1",
"inputs": [[37, 0], [38, 0]],
"backward_source_id": -1
}
],
"arg_nodes": [
0,
1,
2,
4,
7,
8,
10,
13,
14,
16,
19,
20,
22,
24,
25,
27,
29,
30,
32,
33,
35,
36,
38
],
"heads": [[31, 0], [34, 0], [39, 0]]
}
\ No newline at end of file
name: "ONet"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 48
input_dim: 48
##################################
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 32
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu1"
type: "PReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu2"
type: "PReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 64
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu3"
type: "PReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4"
type: "Convolution"
bottom: "pool3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
convolution_param {
num_output: 128
kernel_size: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prelu4"
type: "PReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "InnerProduct"
bottom: "conv4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
#kernel_size: 3
num_output: 256
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "drop5"
type: "Dropout"
bottom: "conv5"
top: "conv5"
dropout_param {
dropout_ratio: 0.25
}
}
layer {
name: "prelu5"
type: "PReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "conv6-1"
type: "InnerProduct"
bottom: "conv5"
top: "conv6-1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
#kernel_size: 1
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6-2"
type: "InnerProduct"
bottom: "conv5"
top: "conv6-2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
#kernel_size: 1
num_output: 4
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6-3"
type: "InnerProduct"
bottom: "conv5"
top: "conv6-3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
#kernel_size: 1
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "prob1"
type: "Softmax"
bottom: "conv6-1"
top: "prob1"
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment