Commit 08aae43d by vincent

runnin nets sequentially instead of in batches seems to be faster + gather runtime stats

parent e04770cb
...@@ -10,11 +10,12 @@ import { TNetInput } from '../types'; ...@@ -10,11 +10,12 @@ import { TNetInput } from '../types';
import { bgrToRgbTensor } from './bgrToRgbTensor'; import { bgrToRgbTensor } from './bgrToRgbTensor';
import { extractParams } from './extractParams'; import { extractParams } from './extractParams';
import { FaceLandmarks5 } from './FaceLandmarks5'; import { FaceLandmarks5 } from './FaceLandmarks5';
import { getSizesForScale } from './getSizesForScale';
import { pyramidDown } from './pyramidDown'; import { pyramidDown } from './pyramidDown';
import { stage1 } from './stage1'; import { stage1 } from './stage1';
import { stage2 } from './stage2'; import { stage2 } from './stage2';
import { stage3 } from './stage3'; import { stage3 } from './stage3';
import { NetParams } from './types'; import { MtcnnResult, NetParams } from './types';
export class Mtcnn extends NeuralNetwork<NetParams> { export class Mtcnn extends NeuralNetwork<NetParams> {
...@@ -26,8 +27,9 @@ export class Mtcnn extends NeuralNetwork<NetParams> { ...@@ -26,8 +27,9 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
input: NetInput, input: NetInput,
minFaceSize: number = 20, minFaceSize: number = 20,
scaleFactor: number = 0.709, scaleFactor: number = 0.709,
maxNumScales: number = 10,
scoreThresholds: number[] = [0.6, 0.7, 0.7] scoreThresholds: number[] = [0.6, 0.7, 0.7]
): Promise<any> { ): Promise<{ results: MtcnnResult[], stats: any }> {
const { params } = this const { params } = this
...@@ -42,6 +44,10 @@ export class Mtcnn extends NeuralNetwork<NetParams> { ...@@ -42,6 +44,10 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
throw new Error('Mtcnn - inputCanvas is not defined, note that passing tensors into Mtcnn.forwardInput is not supported yet.') throw new Error('Mtcnn - inputCanvas is not defined, note that passing tensors into Mtcnn.forwardInput is not supported yet.')
} }
const stats: any = {}
const tsTotal = Date.now()
const imgTensor = tf.tidy(() => const imgTensor = tf.tidy(() =>
bgrToRgbTensor( bgrToRgbTensor(
tf.expandDims(inputTensor).toFloat() as tf.Tensor4D tf.expandDims(inputTensor).toFloat() as tf.Tensor4D
...@@ -51,18 +57,47 @@ export class Mtcnn extends NeuralNetwork<NetParams> { ...@@ -51,18 +57,47 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
const [height, width] = imgTensor.shape.slice(1) const [height, width] = imgTensor.shape.slice(1)
const scales = pyramidDown(minFaceSize, scaleFactor, [height, width]) const scales = pyramidDown(minFaceSize, scaleFactor, [height, width])
const out1 = await stage1(imgTensor, scales, scoreThresholds[0], params.pnet) .filter(scale => {
const sizes = getSizesForScale(scale, [height, width])
return Math.min(sizes.width, sizes.height) > 48
})
.slice(0, maxNumScales)
stats.scales = scales
stats.pyramid = scales.map(scale => getSizesForScale(scale, [height, width]))
let ts = Date.now()
const out1 = await stage1(imgTensor, scales, scoreThresholds[0], params.pnet, stats)
stats.total_stage1 = Date.now() - ts
if (!out1.boxes.length) {
stats.total = Date.now() - tsTotal
return { results: [], stats }
}
stats.stage2_numInputBoxes = out1.boxes.length
// using the inputCanvas to extract and resize the image patches, since it is faster // using the inputCanvas to extract and resize the image patches, since it is faster
// than doing this on the gpu // than doing this on the gpu
const out2 = await stage2(inputCanvas, out1.boxes, scoreThresholds[1], params.rnet) ts = Date.now()
const out3 = await stage3(inputCanvas, out2.boxes, scoreThresholds[2], params.onet) const out2 = await stage2(inputCanvas, out1.boxes, scoreThresholds[1], params.rnet, stats)
stats.total_stage2 = Date.now() - ts
if (!out2.boxes.length) {
stats.total = Date.now() - tsTotal
return { results: [], stats }
}
stats.stage3_numInputBoxes = out2.boxes.length
ts = Date.now()
const out3 = await stage3(inputCanvas, out2.boxes, scoreThresholds[2], params.onet, stats)
stats.total_stage3 = Date.now() - ts
imgTensor.dispose() imgTensor.dispose()
input.dispose() input.dispose()
const faceDetections = out3.boxes.map((box, idx) => const results = out3.boxes.map((box, idx) => ({
new FaceDetection( faceDetection: new FaceDetection(
out3.scores[idx], out3.scores[idx],
new Rect( new Rect(
box.left / width, box.left / width,
...@@ -74,32 +109,47 @@ export class Mtcnn extends NeuralNetwork<NetParams> { ...@@ -74,32 +109,47 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
height, height,
width width
} }
) ),
) faceLandmarks: new FaceLandmarks5(
out3.points[idx].map(pt => pt.div(new Point(width, height))),
const faceLandmarks = out3.points.map(pts =>
new FaceLandmarks5(
pts.map(pt => pt.div(new Point(width, height))),
{ width, height } { width, height }
) )
) }))
return { stats.total = Date.now() - tsTotal
faceDetections, return { results, stats }
faceLandmarks
}
} }
public async forward( public async forward(
input: TNetInput, input: TNetInput,
minFaceSize: number = 20, minFaceSize: number = 20,
scaleFactor: number = 0.709, scaleFactor: number = 0.709,
maxNumScales: number = 10,
scoreThresholds: number[] = [0.6, 0.7, 0.7]
): Promise<MtcnnResult[]> {
return (
await this.forwardInput(
await toNetInput(input, true, true),
minFaceSize,
scaleFactor,
maxNumScales,
scoreThresholds
)
).results
}
public async forwardWithStats(
input: TNetInput,
minFaceSize: number = 20,
scaleFactor: number = 0.709,
maxNumScales: number = 10,
scoreThresholds: number[] = [0.6, 0.7, 0.7] scoreThresholds: number[] = [0.6, 0.7, 0.7]
): Promise<tf.Tensor2D> { ): Promise<{ results: MtcnnResult[], stats: any }> {
return this.forwardInput( return this.forwardInput(
await toNetInput(input, true, true), await toNetInput(input, true, true),
minFaceSize, minFaceSize,
scaleFactor, scaleFactor,
maxNumScales,
scoreThresholds scoreThresholds
) )
} }
......
...@@ -10,7 +10,7 @@ export async function extractImagePatches( ...@@ -10,7 +10,7 @@ export async function extractImagePatches(
img: HTMLCanvasElement, img: HTMLCanvasElement,
boxes: BoundingBox[], boxes: BoundingBox[],
{ width, height }: Dimensions { width, height }: Dimensions
): Promise<tf.Tensor4D> { ): Promise<tf.Tensor4D[]> {
const imgCtx = getContext2dOrThrow(img) const imgCtx = getContext2dOrThrow(img)
...@@ -26,7 +26,7 @@ export async function extractImagePatches( ...@@ -26,7 +26,7 @@ export async function extractImagePatches(
return createImageBitmap(imgData) return createImageBitmap(imgData)
})) }))
const imagePatchesData: number[] = [] const imagePatchesDatas: number[][] = []
bitmaps.forEach(bmp => { bitmaps.forEach(bmp => {
const patch = createCanvas({ width, height }) const patch = createCanvas({ width, height })
...@@ -34,18 +34,24 @@ export async function extractImagePatches( ...@@ -34,18 +34,24 @@ export async function extractImagePatches(
patchCtx.drawImage(bmp, 0, 0, width, height) patchCtx.drawImage(bmp, 0, 0, width, height)
const { data } = patchCtx.getImageData(0, 0, width, height) const { data } = patchCtx.getImageData(0, 0, width, height)
const currData = []
for(let i = 0; i < data.length; i++) { for(let i = 0; i < data.length; i++) {
if ((i + 1) % 4 === 0) continue if ((i + 1) % 4 === 0) continue
imagePatchesData.push(data[i]) currData.push(data[i])
} }
imagePatchesDatas.push(currData)
}) })
return tf.tidy(() => {
return imagePatchesDatas.map(data => {
const t = tf.tidy(() => {
const imagePatchTensor = bgrToRgbTensor(tf.transpose( const imagePatchTensor = bgrToRgbTensor(tf.transpose(
tf.tensor4d(imagePatchesData, [boxes.length, width, height, 3]), tf.tensor4d(data, [1, width, height, 3]),
[0, 2, 1, 3] [0, 2, 1, 3]
).toFloat()) as tf.Tensor4D ).toFloat()) as tf.Tensor4D
return normalize(imagePatchTensor) return normalize(imagePatchTensor)
}) })
return t
})
} }
\ No newline at end of file
export function getSizesForScale(scale: number, [height, width]: number[]) {
return {
height: Math.floor(height * scale),
width: Math.floor(width * scale)
}
}
\ No newline at end of file
...@@ -7,12 +7,13 @@ import { nms } from './nms'; ...@@ -7,12 +7,13 @@ import { nms } from './nms';
import { normalize } from './normalize'; import { normalize } from './normalize';
import { PNet } from './PNet'; import { PNet } from './PNet';
import { PNetParams } from './types'; import { PNetParams } from './types';
import { getSizesForScale } from './getSizesForScale';
function rescaleAndNormalize(x: tf.Tensor4D, scale: number): tf.Tensor4D { function rescaleAndNormalize(x: tf.Tensor4D, scale: number): tf.Tensor4D {
return tf.tidy(() => { return tf.tidy(() => {
const [height, width] = x.shape.slice(1) const { height, width } = getSizesForScale(scale, x.shape.slice(1))
const resized = tf.image.resizeBilinear(x, [Math.floor(height * scale), Math.floor(width * scale)]) const resized = tf.image.resizeBilinear(x, [height, width])
const normalized = normalize(resized) const normalized = normalize(resized)
return (tf.transpose(normalized, [0, 2, 1, 3]) as tf.Tensor4D) return (tf.transpose(normalized, [0, 2, 1, 3]) as tf.Tensor4D)
...@@ -67,17 +68,20 @@ export function stage1( ...@@ -67,17 +68,20 @@ export function stage1(
imgTensor: tf.Tensor4D, imgTensor: tf.Tensor4D,
scales: number[], scales: number[],
scoreThreshold: number, scoreThreshold: number,
params: PNetParams params: PNetParams,
stats: any
) { ) {
stats.stage1 = []
const boxesForScale = scales.map((scale, i) => { const boxesForScale = scales.map((scale) => {
const statsForScale: any = { scale }
const { scoresTensor, regionsTensor } = tf.tidy(() => { const { scoresTensor, regionsTensor } = tf.tidy(() => {
const resized = rescaleAndNormalize(imgTensor, scale) const resized = rescaleAndNormalize(imgTensor, scale)
let ts = Date.now()
const { prob, regions } = PNet(resized, params) const { prob, regions } = PNet(resized, params)
statsForScale.pnet = Date.now() - ts
const scoresTensor = tf.unstack(tf.unstack(prob, 3)[1])[0] as tf.Tensor2D const scoresTensor = tf.unstack(tf.unstack(prob, 3)[1])[0] as tf.Tensor2D
const regionsTensor = tf.unstack(regions)[0] as tf.Tensor3D const regionsTensor = tf.unstack(regions)[0] as tf.Tensor3D
...@@ -99,15 +103,20 @@ export function stage1( ...@@ -99,15 +103,20 @@ export function stage1(
regionsTensor.dispose() regionsTensor.dispose()
if (!boundingBoxes.length) { if (!boundingBoxes.length) {
stats.stage1.push(statsForScale)
return [] return []
} }
let ts = Date.now()
const indices = nms( const indices = nms(
boundingBoxes.map(bbox => bbox.cell), boundingBoxes.map(bbox => bbox.cell),
boundingBoxes.map(bbox => bbox.score), boundingBoxes.map(bbox => bbox.score),
0.5 0.5
) )
statsForScale.nms = Date.now() - ts
statsForScale.numBoxes = indices.length
stats.stage1.push(statsForScale)
return indices.map(boxIdx => boundingBoxes[boxIdx]) return indices.map(boxIdx => boundingBoxes[boxIdx])
}) })
...@@ -119,11 +128,13 @@ export function stage1( ...@@ -119,11 +128,13 @@ export function stage1(
let finalScores: number[] = [] let finalScores: number[] = []
if (allBoxes.length > 0) { if (allBoxes.length > 0) {
let ts = Date.now()
const indices = nms( const indices = nms(
allBoxes.map(bbox => bbox.cell), allBoxes.map(bbox => bbox.cell),
allBoxes.map(bbox => bbox.score), allBoxes.map(bbox => bbox.score),
0.7 0.7
) )
stats.stage1_nms = Date.now() - ts
finalScores = indices.map(idx => allBoxes[idx].score) finalScores = indices.map(idx => allBoxes[idx].score)
finalBoxes = indices finalBoxes = indices
......
...@@ -8,15 +8,26 @@ export async function stage2( ...@@ -8,15 +8,26 @@ export async function stage2(
img: HTMLCanvasElement, img: HTMLCanvasElement,
inputBoxes: BoundingBox[], inputBoxes: BoundingBox[],
scoreThreshold: number, scoreThreshold: number,
params: RNetParams params: RNetParams,
stats: any
) { ) {
const rnetInput = await extractImagePatches(img, inputBoxes, { width: 24, height: 24 }) let ts = Date.now()
const rnetOut = RNet(rnetInput, params) const rnetInputs = await extractImagePatches(img, inputBoxes, { width: 24, height: 24 })
stats.stage2_extractImagePatches = Date.now() - ts
ts = Date.now()
const rnetOuts = rnetInputs.map(
rnetInput => {
const out = RNet(rnetInput, params)
rnetInput.dispose() rnetInput.dispose()
return out
}
)
stats.stage2_rnet = Date.now() - ts
const scores = Array.from(await rnetOut.scores.data()) const scoreDatas = await Promise.all(rnetOuts.map(out => out.scores.data()))
const scores = scoreDatas.map(arr => Array.from(arr)).reduce((all, arr) => all.concat(arr))
const indices = scores const indices = scores
.map((score, idx) => ({ score, idx })) .map((score, idx) => ({ score, idx }))
.filter(c => c.score > scoreThreshold) .filter(c => c.score > scoreThreshold)
...@@ -29,18 +40,20 @@ export async function stage2( ...@@ -29,18 +40,20 @@ export async function stage2(
let finalScores: number[] = [] let finalScores: number[] = []
if (filteredBoxes.length > 0) { if (filteredBoxes.length > 0) {
ts = Date.now()
const indicesNms = nms( const indicesNms = nms(
filteredBoxes, filteredBoxes,
filteredScores, filteredScores,
0.7 0.7
) )
stats.stage2_nms = Date.now() - ts
const regions = indicesNms.map(idx => const regions = indicesNms.map(idx =>
new BoundingBox( new BoundingBox(
rnetOut.regions.get(indices[idx], 0), rnetOuts[indices[idx]].regions.get(0, 0),
rnetOut.regions.get(indices[idx], 1), rnetOuts[indices[idx]].regions.get(0, 1),
rnetOut.regions.get(indices[idx], 2), rnetOuts[indices[idx]].regions.get(0, 2),
rnetOut.regions.get(indices[idx], 3) rnetOuts[indices[idx]].regions.get(0, 3)
) )
) )
...@@ -48,8 +61,10 @@ export async function stage2( ...@@ -48,8 +61,10 @@ export async function stage2(
finalBoxes = indicesNms.map((idx, i) => filteredBoxes[idx].calibrate(regions[i])) finalBoxes = indicesNms.map((idx, i) => filteredBoxes[idx].calibrate(regions[i]))
} }
rnetOut.regions.dispose() rnetOuts.forEach(t => {
rnetOut.scores.dispose() t.regions.dispose()
t.scores.dispose()
})
return { return {
boxes: finalBoxes, boxes: finalBoxes,
......
...@@ -9,25 +9,36 @@ export async function stage3( ...@@ -9,25 +9,36 @@ export async function stage3(
img: HTMLCanvasElement, img: HTMLCanvasElement,
inputBoxes: BoundingBox[], inputBoxes: BoundingBox[],
scoreThreshold: number, scoreThreshold: number,
params: ONetParams params: ONetParams,
stats: any
) { ) {
const onetInput = await extractImagePatches(img, inputBoxes, { width: 48, height: 48 }) let ts = Date.now()
const onetOut = ONet(onetInput, params) const onetInputs = await extractImagePatches(img, inputBoxes, { width: 48, height: 48 })
stats.stage3_extractImagePatches = Date.now() - ts
ts = Date.now()
const onetOuts = onetInputs.map(
onetInput => {
const out = ONet(onetInput, params)
onetInput.dispose() onetInput.dispose()
return out
}
)
stats.stage3_onet = Date.now() - ts
const scores = Array.from(await onetOut.scores.data()) const scoreDatas = await Promise.all(onetOuts.map(out => out.scores.data()))
const scores = scoreDatas.map(arr => Array.from(arr)).reduce((all, arr) => all.concat(arr))
const indices = scores const indices = scores
.map((score, idx) => ({ score, idx })) .map((score, idx) => ({ score, idx }))
.filter(c => c.score > scoreThreshold) .filter(c => c.score > scoreThreshold)
.map(({ idx }) => idx) .map(({ idx }) => idx)
const filteredRegions = indices.map(idx => new BoundingBox( const filteredRegions = indices.map(idx => new BoundingBox(
onetOut.regions.get(idx, 0), onetOuts[idx].regions.get(0, 0),
onetOut.regions.get(idx, 1), onetOuts[idx].regions.get(0, 1),
onetOut.regions.get(idx, 2), onetOuts[idx].regions.get(0, 2),
onetOut.regions.get(idx, 3) onetOuts[idx].regions.get(0, 3)
)) ))
const filteredBoxes = indices const filteredBoxes = indices
.map((idx, i) => inputBoxes[idx].calibrate(filteredRegions[i])) .map((idx, i) => inputBoxes[idx].calibrate(filteredRegions[i]))
...@@ -39,28 +50,32 @@ export async function stage3( ...@@ -39,28 +50,32 @@ export async function stage3(
if (filteredBoxes.length > 0) { if (filteredBoxes.length > 0) {
ts = Date.now()
const indicesNms = nms( const indicesNms = nms(
filteredBoxes, filteredBoxes,
filteredScores, filteredScores,
0.7, 0.7,
false false
) )
stats.stage3_nms = Date.now() - ts
finalBoxes = indicesNms.map(idx => filteredBoxes[idx]) finalBoxes = indicesNms.map(idx => filteredBoxes[idx])
finalScores = indicesNms.map(idx => filteredScores[idx]) finalScores = indicesNms.map(idx => filteredScores[idx])
points = indicesNms.map((idx, i) => points = indicesNms.map((idx, i) =>
Array(5).fill(0).map((_, ptIdx) => Array(5).fill(0).map((_, ptIdx) =>
new Point( new Point(
((onetOut.points.get(idx, ptIdx) * (finalBoxes[i].width + 1)) + finalBoxes[i].left) , ((onetOuts[idx].points.get(0, ptIdx) * (finalBoxes[i].width + 1)) + finalBoxes[i].left) ,
((onetOut.points.get(idx, ptIdx + 5) * (finalBoxes[i].height + 1)) + finalBoxes[i].top) ((onetOuts[idx].points.get(0, ptIdx + 5) * (finalBoxes[i].height + 1)) + finalBoxes[i].top)
) )
) )
) )
} }
onetOut.regions.dispose() onetOuts.forEach(t => {
onetOut.scores.dispose() t.regions.dispose()
onetOut.points.dispose() t.scores.dispose()
t.points.dispose()
})
return { return {
boxes: finalBoxes, boxes: finalBoxes,
......
import { tf } from '..'; import { tf } from '..';
import { ConvParams, FCParams } from '../commons/types'; import { ConvParams, FCParams } from '../commons/types';
import { BoundingBox } from './BoundingBox'; import { FaceDetection } from '../faceDetectionNet/FaceDetection';
import { FaceLandmarks5 } from './FaceLandmarks5';
export type SharedParams = { export type SharedParams = {
conv1: ConvParams conv1: ConvParams
...@@ -38,3 +39,8 @@ export type NetParams = { ...@@ -38,3 +39,8 @@ export type NetParams = {
rnet: RNetParams rnet: RNetParams
onet: ONetParams onet: ONetParams
} }
export type MtcnnResult = {
faceDetection: FaceDetection,
faceLandmarks: FaceLandmarks5
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment