Commit 33296efd by vincent

init mtcnn + stage1 until bounding box extraction

parent ae742d9a
...@@ -17,6 +17,7 @@ export * from './faceDetectionNet'; ...@@ -17,6 +17,7 @@ export * from './faceDetectionNet';
export * from './faceLandmarkNet'; export * from './faceLandmarkNet';
export * from './faceRecognitionNet'; export * from './faceRecognitionNet';
export * from './globalApi'; export * from './globalApi';
export * from './mtcnn';
export * from './padToSquare'; export * from './padToSquare';
export * from './toNetInput'; export * from './toNetInput';
export * from './utils' export * from './utils'
\ No newline at end of file
...@@ -4,7 +4,9 @@ import { NeuralNetwork } from '../commons/NeuralNetwork'; ...@@ -4,7 +4,9 @@ import { NeuralNetwork } from '../commons/NeuralNetwork';
import { NetInput } from '../NetInput'; import { NetInput } from '../NetInput';
import { toNetInput } from '../toNetInput'; import { toNetInput } from '../toNetInput';
import { TNetInput } from '../types'; import { TNetInput } from '../types';
import { bgrToRgbTensor } from './bgrToRgbTensor';
import { extractParams } from './extractParams'; import { extractParams } from './extractParams';
import { pyramidDown } from './pyramidDown';
import { stage1 } from './stage1'; import { stage1 } from './stage1';
import { NetParams } from './types'; import { NetParams } from './types';
...@@ -14,7 +16,12 @@ export class Mtcnn extends NeuralNetwork<NetParams> { ...@@ -14,7 +16,12 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
super('Mtcnn') super('Mtcnn')
} }
public forwardInput(input: NetInput, minFaceSize: number = 20, scaleFactor: number = 0.709): tf.Tensor2D { public forwardInput(
input: NetInput,
minFaceSize: number = 20,
scaleFactor: number = 0.709,
scoreThresholds: number[] = [0.6, 0.7, 0.7]
): tf.Tensor2D {
const { params } = this const { params } = this
...@@ -23,28 +30,14 @@ export class Mtcnn extends NeuralNetwork<NetParams> { ...@@ -23,28 +30,14 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
} }
return tf.tidy(() => { return tf.tidy(() => {
const imgTensor = tf.expandDims(input.inputs[0]).toFloat() as tf.Tensor4D // TODO: expects bgr input?
let imgTensor = bgrToRgbTensor(
tf.expandDims(input.inputs[0]).toFloat() as tf.Tensor4D
)
function pyramidDown(minFaceSize: number, scaleFactor: number, dims: number[]): number[] { const scales = pyramidDown(minFaceSize, scaleFactor, imgTensor.shape.slice(1))
const [height, width] = dims const out1 = stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
const m = 12 / minFaceSize
const scales = []
let minLayer = Math.min(height, width) * m
let exp = 0
while (minLayer >= 12) {
scales.push(m * Math.pow(scaleFactor, exp))
minLayer = minLayer * scaleFactor
exp += 1
}
return scales
}
const scales = pyramidDown(minFaceSize, scaleFactor, imgTensor.shape)
const out1 = stage1(imgTensor, scales, params.pnet)
return tf.tensor2d([0], [1, 1]) return tf.tensor2d([0], [1, 1])
}) })
......
...@@ -4,16 +4,15 @@ import { convLayer } from '../commons/convLayer'; ...@@ -4,16 +4,15 @@ import { convLayer } from '../commons/convLayer';
import { sharedLayer } from './sharedLayers'; import { sharedLayer } from './sharedLayers';
import { PNetParams } from './types'; import { PNetParams } from './types';
export function PNet(x: tf.Tensor4D, params: PNetParams): { prob: tf.Tensor3D, convOut: tf.Tensor4D } { export function PNet(x: tf.Tensor4D, params: PNetParams): { prob: tf.Tensor4D, regions: tf.Tensor4D } {
return tf.tidy(() => { return tf.tidy(() => {
let out = sharedLayer(x, params) let out = sharedLayer(x, params, true)
const conv = convLayer(out, params.conv4_1, 'valid') const conv = convLayer(out, params.conv4_1, 'valid')
// TODO: tf.reduce_max <=> tf.max ? const max = tf.expandDims(tf.max(conv, 3), 3)
const logits = tf.sub(conv, tf.max(conv, 3)) const prob = tf.softmax(tf.sub(conv, max), 3) as tf.Tensor4D
const prob = tf.softmax(logits, 3) as tf.Tensor3D const regions = convLayer(out, params.conv4_2, 'valid')
const convOut = convLayer(out, params.conv4_2, 'valid')
return { prob, convOut } return { prob, regions }
}) })
} }
\ No newline at end of file
import * as tf from '@tensorflow/tfjs-core';
export function bgrToRgbTensor(tensor: tf.Tensor4D): tf.Tensor4D {
return tf.tidy(
() => tf.stack(tf.unstack(tensor, 3), 3)
) as tf.Tensor4D
}
\ No newline at end of file
export const CELL_STRIDE = 2
export const CELL_SIZE = 12
\ No newline at end of file
...@@ -40,7 +40,7 @@ function extractorsFactory(extractWeights: ExtractWeightsFunction, paramMappings ...@@ -40,7 +40,7 @@ function extractorsFactory(extractWeights: ExtractWeightsFunction, paramMappings
function extractRNetParams(): RNetParams { function extractRNetParams(): RNetParams {
const sharedParams = extractSharedParams([3, 28, 48, 64], 'rnet') const sharedParams = extractSharedParams([3, 28, 48, 64], 'rnet', true)
const fc1 = extractFCParams(576, 128, 'rnet/fc1') const fc1 = extractFCParams(576, 128, 'rnet/fc1')
const prelu4_alpha = extractPReluParams(128, 'rnet/prelu4_alpha') const prelu4_alpha = extractPReluParams(128, 'rnet/prelu4_alpha')
const fc2_1 = extractFCParams(128, 2, 'rnet/fc2_1') const fc2_1 = extractFCParams(128, 2, 'rnet/fc2_1')
...@@ -90,5 +90,9 @@ export function extractParams(weights: Float32Array): { params: NetParams, param ...@@ -90,5 +90,9 @@ export function extractParams(weights: Float32Array): { params: NetParams, param
const rnet = extractRNetParams() const rnet = extractRNetParams()
const onet = extractONetParams() const onet = extractONetParams()
if (getRemainingWeights().length !== 0) {
throw new Error(`weights remaing after extract: ${getRemainingWeights().length}`)
}
return { params: { pnet, rnet, onet }, paramMappings } return { params: { pnet, rnet, onet }, paramMappings }
} }
\ No newline at end of file
import { CELL_SIZE } from './config';
export function pyramidDown(minFaceSize: number, scaleFactor: number, dims: number[]): number[] {
const [height, width] = dims
const m = CELL_SIZE / minFaceSize
const scales = []
let minLayer = Math.min(height, width) * m
let exp = 0
while (minLayer >= 12) {
scales.push(m * Math.pow(scaleFactor, exp))
minLayer = minLayer * scaleFactor
exp += 1
}
return scales
}
\ No newline at end of file
import * as tf from '@tensorflow/tfjs-core'; import * as tf from '@tensorflow/tfjs-core';
import { Point } from '../Point';
import { CELL_SIZE, CELL_STRIDE } from './config';
import { PNet } from './PNet'; import { PNet } from './PNet';
import { PNetParams } from './types'; import { PNetParams } from './types';
function rescaleAndNormalize(x: tf.Tensor4D, scale: number): tf.Tensor4D { function rescaleAndNormalize(x: tf.Tensor4D, scale: number): tf.Tensor4D {
return tf.tidy(() => { return tf.tidy(() => {
const [height, width] = x.shape
const resized = tf.image.resizeBilinear(x, [height * scale, width * scale])
return tf.mul(tf.sub(resized, tf.scalar(127.5)), tf.scalar(0.0078125)) const [height, width] = x.shape.slice(1)
const resized = tf.image.resizeBilinear(x, [Math.floor(height * scale), Math.floor(width * scale)])
// TODO: ? const normalized = tf.mul(tf.sub(resized, tf.scalar(127.5)), tf.scalar(0.0078125))
// img_x = np.expand_dims(scaled_image, 0)
// img_y = np.transpose(img_x, (0, 2, 1, 3))
return (tf.transpose(normalized, [0, 2, 1, 3]) as tf.Tensor4D)
}) })
} }
export function stage1(x: tf.Tensor4D, scales: number[], params: PNetParams) {
return tf.tidy(() => {
const boxes = scales.map((scale) => { function extractBoundingBoxes(
const resized = rescaleAndNormalize(x, scale) scores: tf.Tensor2D,
const { prob, convOut } = PNet(resized, params) regions: tf.Tensor3D,
}) scale: number,
scoreThreshold: number
) {
}) // TODO: fix this!, maybe better to use tf.gather here
} const indices2D: Point[] = []
for (let y = 0; y < scores.shape[0]; y++) {
for (let x = 0; x < scores.shape[1]; x++) {
if (scores.get(y, x) >= scoreThreshold) {
indices2D.push(new Point(x, y))
}
}
}
/* if (!indices2D.length) {
return null
}
for scale in scales: return tf.tidy(() => {
scaled_image = self.__scale_image(image, scale)
img_x = np.expand_dims(scaled_image, 0) const indicesTensor = tf.tensor2d(
img_y = np.transpose(img_x, (0, 2, 1, 3)) indices2D.map(pt => [pt.y, pt.x]),
[indices2D.length, 2]
)
out = self.__pnet.feed(img_y) const cellsStart = tf.round(
indicesTensor.mul(tf.scalar(CELL_STRIDE)).add(tf.scalar(1)).div(tf.scalar(scale))
) as tf.Tensor2D
const cellsEnd = tf.round(
indicesTensor.mul(tf.scalar(CELL_STRIDE)).add(tf.scalar(CELL_SIZE)).div(tf.scalar(scale))
) as tf.Tensor2D
out0 = np.transpose(out[0], (0, 2, 1, 3)) const scoresTensor = tf.tensor1d(indices2D.map(pt => scores.get(pt.y, pt.x)))
out1 = np.transpose(out[1], (0, 2, 1, 3))
boxes, _ = self.__generate_bounding_box(out1[0, :, :, 1].copy(), const candidateRegions = indices2D.map(c => ({
out0[0, :, :, :].copy(), scale, self.__steps_threshold[0]) left: regions.get(c.y, c.x, 0),
top: regions.get(c.y, c.x, 1),
right: regions.get(c.y, c.x, 2),
bottom: regions.get(c.y, c.x, 3)
}))
# inter-scale nms const regionsTensor = tf.tensor2d(
pick = self.__nms(boxes.copy(), 0.5, 'Union') candidateRegions.map(r => [r.left, r.top, r.right, r.bottom]),
if boxes.size > 0 and pick.size > 0: [candidateRegions.length, 4]
boxes = boxes[pick, :] )
total_boxes = np.append(total_boxes, boxes, axis=0)
const boxesTensor = tf.concat2d([cellsStart, cellsEnd, scoresTensor.as2D(scoresTensor.size, 1), regionsTensor], 1)
return boxesTensor
})
}
// TODO: debug
declare const window: any
numboxes = total_boxes.shape[0] export function stage1(x: tf.Tensor4D, scales: number[], scoreThreshold: number, params: PNetParams) {
return tf.tidy(() => {
const boxes = scales.map((scale, i) => {
let resized = i === 0
// TODO: debug
? tf.tensor4d(window.resizedData, [1, 820, 461, 3])
if numboxes > 0: : rescaleAndNormalize(x, scale)
pick = self.__nms(total_boxes.copy(), 0.7, 'Union')
total_boxes = total_boxes[pick, :]
regw = total_boxes[:, 2] - total_boxes[:, 0] const { prob, regions } = PNet(resized, params)
regh = total_boxes[:, 3] - total_boxes[:, 1]
qq1 = total_boxes[:, 0] + total_boxes[:, 5] * regw const scores = tf.unstack(prob, 3)[1]
qq2 = total_boxes[:, 1] + total_boxes[:, 6] * regh const [sh, sw] = scores.shape.slice(1)
qq3 = total_boxes[:, 2] + total_boxes[:, 7] * regw const [rh, rw] = regions.shape.slice(1)
qq4 = total_boxes[:, 3] + total_boxes[:, 8] * regh
total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:, 4]]))
total_boxes = self.__rerec(total_boxes.copy())
total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4]).astype(np.int32) const boxes = extractBoundingBoxes(
status = StageStatus(self.__pad(total_boxes.copy(), stage_status.width, stage_status.height), scores.as2D(sh, sw),
width=stage_status.width, height=stage_status.height) regions.as3D(rh, rw, 4),
scale,
scoreThreshold
)
return total_boxes, status // TODO: debug
*/ if (!boxes) {
\ No newline at end of file console.log('no boxes for scale', scale)
return
}
// TODO: debug
i === 0 && (window.boxes = boxes.dataSync())
})
})
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment