Commit 5e957ec7 by vincent

implemented stage2

parent 084cd30a
......@@ -8,6 +8,7 @@ import { createCanvasFromMedia } from './utils';
export class NetInput {
private _inputs: tf.Tensor3D[] = []
private _canvases: HTMLCanvasElement[] = []
private _isManaged: boolean = false
private _isBatchInput: boolean = false
......@@ -16,14 +17,15 @@ export class NetInput {
constructor(
inputs: tf.Tensor4D | Array<TResolvedNetInput>,
isBatchInput: boolean = false
isBatchInput: boolean = false,
keepCanvases: boolean = false
) {
if (isTensor4D(inputs)) {
this._inputs = tf.unstack(inputs as tf.Tensor4D) as tf.Tensor3D[]
}
if (Array.isArray(inputs)) {
this._inputs = inputs.map(input => {
this._inputs = inputs.map((input, idx) => {
if (isTensor3D(input)) {
// TODO: make sure not to dispose original tensors passed in by the user
return tf.clone(input as tf.Tensor3D)
......@@ -39,9 +41,11 @@ export class NetInput {
return (input as tf.Tensor4D).reshape(shape.slice(1) as [number, number, number]) as tf.Tensor3D
}
return tf.fromPixels(
input instanceof HTMLCanvasElement ? input : createCanvasFromMedia(input as HTMLImageElement | HTMLVideoElement)
)
const canvas = input instanceof HTMLCanvasElement ? input : createCanvasFromMedia(input as HTMLImageElement | HTMLVideoElement)
if (keepCanvases) {
this._canvases[idx] = canvas
}
return tf.fromPixels(canvas)
})
}
......@@ -53,6 +57,10 @@ export class NetInput {
return this._inputs
}
public get canvases(): HTMLCanvasElement[] {
return this._canvases
}
public get isManaged(): boolean {
return this._isManaged
}
......
......@@ -55,4 +55,38 @@ export class BoundingBox {
Math.round(this.bottom)
)
}
public padAtBorders(imageHeight: number, imageWidth: number) {
const w = this.width + 1
const h = this.height + 1
let dx = 1
let dy = 1
let edx = w
let edy = h
let x = this.left
let y = this.top
let ex = this.right
let ey = this.bottom
if (ex > imageWidth) {
edx = -ex + imageWidth + w
ex = imageWidth
}
if (ey > imageHeight) {
edy = -ey + imageHeight + h
ey = imageHeight
}
if (x < 1) {
edy = 2 - x
x = 1
}
if (y < 1) {
edy = 2 - y
y = 1
}
return { dy, edy, dx, edx, y, ey, x, ex, w, h }
}
}
\ No newline at end of file
......@@ -8,6 +8,7 @@ import { bgrToRgbTensor } from './bgrToRgbTensor';
import { extractParams } from './extractParams';
import { pyramidDown } from './pyramidDown';
import { stage1 } from './stage1';
import { stage2 } from './stage2';
import { NetParams } from './types';
export class Mtcnn extends NeuralNetwork<NetParams> {
......@@ -16,12 +17,12 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
super('Mtcnn')
}
public forwardInput(
public async forwardInput(
input: NetInput,
minFaceSize: number = 20,
scaleFactor: number = 0.709,
scoreThresholds: number[] = [0.6, 0.7, 0.7]
): tf.Tensor2D {
): Promise<tf.Tensor2D> {
const { params } = this
......@@ -29,18 +30,32 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
throw new Error('Mtcnn - load model before inference')
}
return tf.tidy(() => {
// TODO: expects bgr input?
let imgTensor = bgrToRgbTensor(
tf.expandDims(input.inputs[0]).toFloat() as tf.Tensor4D
const inputTensor = input.inputs[0]
const inputCanvas = input.canvases[0]
if (!inputCanvas) {
throw new Error('Mtcnn - inputCanvas is not defined, note that passing tensors into Mtcnn.forwardInput is not supported yet.')
}
const imgTensor = tf.tidy(() =>
bgrToRgbTensor(
tf.expandDims(inputTensor).toFloat() as tf.Tensor4D
)
)
const scales = pyramidDown(minFaceSize, scaleFactor, imgTensor.shape.slice(1))
const out1 = await stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
// using the inputCanvas to extract and resize the image patches, since it is faster
// than doing this on the gpu
const out2 = await stage2(inputCanvas, out1, scoreThresholds[1], params.rnet)
const out1 = stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
imgTensor.dispose()
input.dispose()
return tf.tensor2d([0], [1, 1])
})
}
public async forward(
......@@ -50,7 +65,7 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
scoreThresholds: number[] = [0.6, 0.7, 0.7]
): Promise<tf.Tensor2D> {
return this.forwardInput(
await toNetInput(input, true),
await toNetInput(input, true, true),
minFaceSize,
scaleFactor,
scoreThresholds
......
import * as tf from '@tensorflow/tfjs-core';
import { fullyConnectedLayer } from '../faceLandmarkNet/fullyConnectedLayer';
import { prelu } from './prelu';
import { sharedLayer } from './sharedLayers';
import { RNetParams } from './types';
export function RNet(x: tf.Tensor4D, params: RNetParams): { prob: tf.Tensor2D, regions: tf.Tensor2D } {
return tf.tidy(() => {
const convOut = sharedLayer(x, params)
const vectorized = tf.reshape(convOut, [convOut.shape[0], params.fc1.weights.shape[0]]) as tf.Tensor2D
const fc1 = fullyConnectedLayer(vectorized, params.fc1)
const prelu4 = prelu<tf.Tensor2D>(fc1, params.prelu4_alpha)
const fc2_1 = fullyConnectedLayer(prelu4, params.fc2_1)
const max = tf.expandDims(tf.max(fc2_1, 1), 1)
const prob = tf.softmax(tf.sub(fc2_1, max), 1) as tf.Tensor2D
const regions = fullyConnectedLayer(prelu4, params.fc2_2)
return { prob, regions }
})
}
\ No newline at end of file
......@@ -2,6 +2,6 @@ import * as tf from '@tensorflow/tfjs-core';
export function bgrToRgbTensor(tensor: tf.Tensor4D): tf.Tensor4D {
return tf.tidy(
() => tf.stack(tf.unstack(tensor, 3), 3)
() => tf.stack(tf.unstack(tensor, 3).reverse(), 3)
) as tf.Tensor4D
}
\ No newline at end of file
import * as tf from '@tensorflow/tfjs-core';
export function normalize(x: tf.Tensor4D): tf.Tensor4D {
return tf.tidy(
() => tf.mul(tf.sub(x, tf.scalar(127.5)), tf.scalar(0.0078125))
)
}
\ No newline at end of file
import * as tf from '@tensorflow/tfjs-core';
export function prelu(x: tf.Tensor4D, alpha: tf.Tensor1D): tf.Tensor4D {
export function prelu<T extends tf.Tensor>(x: T, alpha: tf.Tensor1D): T {
return tf.tidy(() =>
tf.add(
tf.relu(x),
......
......@@ -8,13 +8,13 @@ export function sharedLayer(x: tf.Tensor4D, params: SharedParams, isPnet: boolea
return tf.tidy(() => {
let out = convLayer(x, params.conv1, 'valid')
out = prelu(out, params.prelu1_alpha)
out = prelu<tf.Tensor4D>(out, params.prelu1_alpha)
out = tf.maxPool(out, isPnet ? [2, 2]: [3, 3], [2, 2], 'same')
out = convLayer(out, params.conv2, 'valid')
out = prelu(out, params.prelu2_alpha)
out = prelu<tf.Tensor4D>(out, params.prelu2_alpha)
out = isPnet ? out : tf.maxPool(out, [3, 3], [2, 2], 'valid')
out = convLayer(out, params.conv3, 'valid')
out = prelu(out, params.prelu3_alpha)
out = prelu<tf.Tensor4D>(out, params.prelu3_alpha)
return out
})
......
......@@ -79,13 +79,12 @@ export function stage1(
const { prob, regions } = PNet(resized, params)
const scores = tf.unstack(prob, 3)[1]
const [sh, sw] = scores.shape.slice(1)
const [rh, rw] = regions.shape.slice(1)
const scoresTensor = tf.unstack(tf.unstack(prob, 3)[1])[0] as tf.Tensor2D
const regionsTensor = tf.unstack(regions)[0] as tf.Tensor3D
return {
scoresTensor: scores.as2D(sh, sw),
regionsTensor: regions.as3D(rh, rw, 4)
scoresTensor,
regionsTensor
}
})
......
import * as tf from '@tensorflow/tfjs-core';
import { createCanvas, getContext2dOrThrow } from '../utils';
import { bgrToRgbTensor } from './bgrToRgbTensor';
import { BoundingBox } from './BoundingBox';
import { nms } from './nms';
import { normalize } from './normalize';
import { RNet } from './RNet';
import { RNetParams } from './types';
export async function stage2(
img: HTMLCanvasElement,
boxes: { box: BoundingBox, score: number }[],
scoreThreshold: number,
params: RNetParams
) {
const { height, width } = img
const imgCtx = getContext2dOrThrow(img)
const bitmaps = await Promise.all(boxes.map(async ({ box }) => {
// TODO: correct padding
const { y, ey, x, ex } = box.padAtBorders(height, width)
const fromX = x - 1
const fromY = y - 1
const imgData = imgCtx.getImageData(fromX, fromY, (ex - fromX), (ey - fromY))
return createImageBitmap(imgData)
}))
const imagePatchesData: number[] = []
bitmaps.forEach(bmp => {
const patch = createCanvas({ width: 24, height: 24 })
const patchCtx = getContext2dOrThrow(patch)
patchCtx.drawImage(bmp, 0, 0, 24, 24)
const { data } = patchCtx.getImageData(0, 0, 24, 24)
for(let i = 0; i < data.length; i++) {
if ((i + 1) % 4 === 0) continue
imagePatchesData.push(data[i])
}
})
const rnetOut = tf.tidy(() => {
const imagePatchTensor = bgrToRgbTensor(tf.transpose(
tf.tensor4d(imagePatchesData, [boxes.length, 24, 24, 3]),
[0, 2, 1, 3]
).toFloat()) as tf.Tensor4D
const normalized = normalize(imagePatchTensor)
const { prob, regions } = RNet(normalized, params)
return {
scores: tf.unstack(prob, 1)[1],
regions
}
})
const scores = Array.from(await rnetOut.scores.data())
const indices = scores
.map((score, idx) => ({ score, idx }))
.filter(c => c.score > scoreThreshold)
.map(({ idx }) => idx)
const filteredBoxes = indices.map(idx => boxes[idx].box)
const filteredScores = indices.map(idx => scores[idx])
let finalBoxes: BoundingBox[] = []
let finalScores: number[] = []
if (filteredBoxes.length > 0) {
const indicesNms = nms(
filteredBoxes,
filteredScores,
0.7
)
finalScores = indicesNms.map(idx => filteredScores[idx])
finalBoxes = indicesNms
.map(idx => {
const box = filteredBoxes[idx]
const [rleft, rtop, right, rbottom] = [
rnetOut.regions.get(indices[idx], 0),
rnetOut.regions.get(indices[idx], 1),
rnetOut.regions.get(indices[idx], 2),
rnetOut.regions.get(indices[idx], 3)
]
return new BoundingBox(
box.left + (rleft * box.width),
box.top + (rtop * box.height),
box.right + (right * box.width),
box.bottom + (rbottom * box.height)
).toSquare().round()
})
}
rnetOut.regions.dispose()
rnetOut.scores.dispose()
return {
finalBoxes,
finalScores
}
}
\ No newline at end of file
......@@ -17,7 +17,8 @@ import { awaitMediaLoaded, resolveInput } from './utils';
*/
export async function toNetInput(
inputs: TNetInput,
manageCreatedInput: boolean = false
manageCreatedInput: boolean = false,
keepCanvases: boolean = false
): Promise<NetInput> {
if (inputs instanceof NetInput) {
return inputs
......@@ -67,5 +68,5 @@ export async function toNetInput(
inputArray.map(input => isMediaElement(input) && awaitMediaLoaded(input))
)
return afterCreate(new NetInput(inputArray, Array.isArray(inputs)))
return afterCreate(new NetInput(inputArray, Array.isArray(inputs), keepCanvases))
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment