Commit 08aae43d by vincent

runnin nets sequentially instead of in batches seems to be faster + gather runtime stats

parent e04770cb
......@@ -10,11 +10,12 @@ import { TNetInput } from '../types';
import { bgrToRgbTensor } from './bgrToRgbTensor';
import { extractParams } from './extractParams';
import { FaceLandmarks5 } from './FaceLandmarks5';
import { getSizesForScale } from './getSizesForScale';
import { pyramidDown } from './pyramidDown';
import { stage1 } from './stage1';
import { stage2 } from './stage2';
import { stage3 } from './stage3';
import { NetParams } from './types';
import { MtcnnResult, NetParams } from './types';
export class Mtcnn extends NeuralNetwork<NetParams> {
......@@ -26,8 +27,9 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
input: NetInput,
minFaceSize: number = 20,
scaleFactor: number = 0.709,
maxNumScales: number = 10,
scoreThresholds: number[] = [0.6, 0.7, 0.7]
): Promise<any> {
): Promise<{ results: MtcnnResult[], stats: any }> {
const { params } = this
......@@ -42,6 +44,10 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
throw new Error('Mtcnn - inputCanvas is not defined, note that passing tensors into Mtcnn.forwardInput is not supported yet.')
}
const stats: any = {}
const tsTotal = Date.now()
const imgTensor = tf.tidy(() =>
bgrToRgbTensor(
tf.expandDims(inputTensor).toFloat() as tf.Tensor4D
......@@ -51,18 +57,47 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
const [height, width] = imgTensor.shape.slice(1)
const scales = pyramidDown(minFaceSize, scaleFactor, [height, width])
const out1 = await stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
.filter(scale => {
const sizes = getSizesForScale(scale, [height, width])
return Math.min(sizes.width, sizes.height) > 48
})
.slice(0, maxNumScales)
stats.scales = scales
stats.pyramid = scales.map(scale => getSizesForScale(scale, [height, width]))
let ts = Date.now()
const out1 = await stage1(imgTensor, scales, scoreThresholds[0], params.pnet, stats)
stats.total_stage1 = Date.now() - ts
if (!out1.boxes.length) {
stats.total = Date.now() - tsTotal
return { results: [], stats }
}
stats.stage2_numInputBoxes = out1.boxes.length
// using the inputCanvas to extract and resize the image patches, since it is faster
// than doing this on the gpu
const out2 = await stage2(inputCanvas, out1.boxes, scoreThresholds[1], params.rnet)
const out3 = await stage3(inputCanvas, out2.boxes, scoreThresholds[2], params.onet)
ts = Date.now()
const out2 = await stage2(inputCanvas, out1.boxes, scoreThresholds[1], params.rnet, stats)
stats.total_stage2 = Date.now() - ts
if (!out2.boxes.length) {
stats.total = Date.now() - tsTotal
return { results: [], stats }
}
stats.stage3_numInputBoxes = out2.boxes.length
ts = Date.now()
const out3 = await stage3(inputCanvas, out2.boxes, scoreThresholds[2], params.onet, stats)
stats.total_stage3 = Date.now() - ts
imgTensor.dispose()
input.dispose()
const faceDetections = out3.boxes.map((box, idx) =>
new FaceDetection(
const results = out3.boxes.map((box, idx) => ({
faceDetection: new FaceDetection(
out3.scores[idx],
new Rect(
box.left / width,
......@@ -74,32 +109,47 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
height,
width
}
)
)
const faceLandmarks = out3.points.map(pts =>
new FaceLandmarks5(
pts.map(pt => pt.div(new Point(width, height))),
),
faceLandmarks: new FaceLandmarks5(
out3.points[idx].map(pt => pt.div(new Point(width, height))),
{ width, height }
)
)
}))
return {
faceDetections,
faceLandmarks
}
stats.total = Date.now() - tsTotal
return { results, stats }
}
public async forward(
input: TNetInput,
minFaceSize: number = 20,
scaleFactor: number = 0.709,
maxNumScales: number = 10,
scoreThresholds: number[] = [0.6, 0.7, 0.7]
): Promise<MtcnnResult[]> {
return (
await this.forwardInput(
await toNetInput(input, true, true),
minFaceSize,
scaleFactor,
maxNumScales,
scoreThresholds
)
).results
}
public async forwardWithStats(
input: TNetInput,
minFaceSize: number = 20,
scaleFactor: number = 0.709,
maxNumScales: number = 10,
scoreThresholds: number[] = [0.6, 0.7, 0.7]
): Promise<tf.Tensor2D> {
): Promise<{ results: MtcnnResult[], stats: any }> {
return this.forwardInput(
await toNetInput(input, true, true),
minFaceSize,
scaleFactor,
maxNumScales,
scoreThresholds
)
}
......
......@@ -10,7 +10,7 @@ export async function extractImagePatches(
img: HTMLCanvasElement,
boxes: BoundingBox[],
{ width, height }: Dimensions
): Promise<tf.Tensor4D> {
): Promise<tf.Tensor4D[]> {
const imgCtx = getContext2dOrThrow(img)
......@@ -26,7 +26,7 @@ export async function extractImagePatches(
return createImageBitmap(imgData)
}))
const imagePatchesData: number[] = []
const imagePatchesDatas: number[][] = []
bitmaps.forEach(bmp => {
const patch = createCanvas({ width, height })
......@@ -34,18 +34,24 @@ export async function extractImagePatches(
patchCtx.drawImage(bmp, 0, 0, width, height)
const { data } = patchCtx.getImageData(0, 0, width, height)
const currData = []
for(let i = 0; i < data.length; i++) {
if ((i + 1) % 4 === 0) continue
imagePatchesData.push(data[i])
currData.push(data[i])
}
imagePatchesDatas.push(currData)
})
return tf.tidy(() => {
const imagePatchTensor = bgrToRgbTensor(tf.transpose(
tf.tensor4d(imagePatchesData, [boxes.length, width, height, 3]),
[0, 2, 1, 3]
).toFloat()) as tf.Tensor4D
return normalize(imagePatchTensor)
return imagePatchesDatas.map(data => {
const t = tf.tidy(() => {
const imagePatchTensor = bgrToRgbTensor(tf.transpose(
tf.tensor4d(data, [1, width, height, 3]),
[0, 2, 1, 3]
).toFloat()) as tf.Tensor4D
return normalize(imagePatchTensor)
})
return t
})
}
\ No newline at end of file
export function getSizesForScale(scale: number, [height, width]: number[]) {
return {
height: Math.floor(height * scale),
width: Math.floor(width * scale)
}
}
\ No newline at end of file
......@@ -7,12 +7,13 @@ import { nms } from './nms';
import { normalize } from './normalize';
import { PNet } from './PNet';
import { PNetParams } from './types';
import { getSizesForScale } from './getSizesForScale';
function rescaleAndNormalize(x: tf.Tensor4D, scale: number): tf.Tensor4D {
return tf.tidy(() => {
const [height, width] = x.shape.slice(1)
const resized = tf.image.resizeBilinear(x, [Math.floor(height * scale), Math.floor(width * scale)])
const { height, width } = getSizesForScale(scale, x.shape.slice(1))
const resized = tf.image.resizeBilinear(x, [height, width])
const normalized = normalize(resized)
return (tf.transpose(normalized, [0, 2, 1, 3]) as tf.Tensor4D)
......@@ -67,17 +68,20 @@ export function stage1(
imgTensor: tf.Tensor4D,
scales: number[],
scoreThreshold: number,
params: PNetParams
params: PNetParams,
stats: any
) {
stats.stage1 = []
const boxesForScale = scales.map((scale, i) => {
const boxesForScale = scales.map((scale) => {
const statsForScale: any = { scale }
const { scoresTensor, regionsTensor } = tf.tidy(() => {
const resized = rescaleAndNormalize(imgTensor, scale)
let ts = Date.now()
const { prob, regions } = PNet(resized, params)
statsForScale.pnet = Date.now() - ts
const scoresTensor = tf.unstack(tf.unstack(prob, 3)[1])[0] as tf.Tensor2D
const regionsTensor = tf.unstack(regions)[0] as tf.Tensor3D
......@@ -99,15 +103,20 @@ export function stage1(
regionsTensor.dispose()
if (!boundingBoxes.length) {
stats.stage1.push(statsForScale)
return []
}
let ts = Date.now()
const indices = nms(
boundingBoxes.map(bbox => bbox.cell),
boundingBoxes.map(bbox => bbox.score),
0.5
)
statsForScale.nms = Date.now() - ts
statsForScale.numBoxes = indices.length
stats.stage1.push(statsForScale)
return indices.map(boxIdx => boundingBoxes[boxIdx])
})
......@@ -119,11 +128,13 @@ export function stage1(
let finalScores: number[] = []
if (allBoxes.length > 0) {
let ts = Date.now()
const indices = nms(
allBoxes.map(bbox => bbox.cell),
allBoxes.map(bbox => bbox.score),
0.7
)
stats.stage1_nms = Date.now() - ts
finalScores = indices.map(idx => allBoxes[idx].score)
finalBoxes = indices
......
......@@ -8,15 +8,26 @@ export async function stage2(
img: HTMLCanvasElement,
inputBoxes: BoundingBox[],
scoreThreshold: number,
params: RNetParams
params: RNetParams,
stats: any
) {
const rnetInput = await extractImagePatches(img, inputBoxes, { width: 24, height: 24 })
const rnetOut = RNet(rnetInput, params)
let ts = Date.now()
const rnetInputs = await extractImagePatches(img, inputBoxes, { width: 24, height: 24 })
stats.stage2_extractImagePatches = Date.now() - ts
rnetInput.dispose()
ts = Date.now()
const rnetOuts = rnetInputs.map(
rnetInput => {
const out = RNet(rnetInput, params)
rnetInput.dispose()
return out
}
)
stats.stage2_rnet = Date.now() - ts
const scores = Array.from(await rnetOut.scores.data())
const scoreDatas = await Promise.all(rnetOuts.map(out => out.scores.data()))
const scores = scoreDatas.map(arr => Array.from(arr)).reduce((all, arr) => all.concat(arr))
const indices = scores
.map((score, idx) => ({ score, idx }))
.filter(c => c.score > scoreThreshold)
......@@ -29,18 +40,20 @@ export async function stage2(
let finalScores: number[] = []
if (filteredBoxes.length > 0) {
ts = Date.now()
const indicesNms = nms(
filteredBoxes,
filteredScores,
0.7
)
stats.stage2_nms = Date.now() - ts
const regions = indicesNms.map(idx =>
new BoundingBox(
rnetOut.regions.get(indices[idx], 0),
rnetOut.regions.get(indices[idx], 1),
rnetOut.regions.get(indices[idx], 2),
rnetOut.regions.get(indices[idx], 3)
rnetOuts[indices[idx]].regions.get(0, 0),
rnetOuts[indices[idx]].regions.get(0, 1),
rnetOuts[indices[idx]].regions.get(0, 2),
rnetOuts[indices[idx]].regions.get(0, 3)
)
)
......@@ -48,8 +61,10 @@ export async function stage2(
finalBoxes = indicesNms.map((idx, i) => filteredBoxes[idx].calibrate(regions[i]))
}
rnetOut.regions.dispose()
rnetOut.scores.dispose()
rnetOuts.forEach(t => {
t.regions.dispose()
t.scores.dispose()
})
return {
boxes: finalBoxes,
......
......@@ -9,25 +9,36 @@ export async function stage3(
img: HTMLCanvasElement,
inputBoxes: BoundingBox[],
scoreThreshold: number,
params: ONetParams
params: ONetParams,
stats: any
) {
const onetInput = await extractImagePatches(img, inputBoxes, { width: 48, height: 48 })
const onetOut = ONet(onetInput, params)
let ts = Date.now()
const onetInputs = await extractImagePatches(img, inputBoxes, { width: 48, height: 48 })
stats.stage3_extractImagePatches = Date.now() - ts
onetInput.dispose()
ts = Date.now()
const onetOuts = onetInputs.map(
onetInput => {
const out = ONet(onetInput, params)
onetInput.dispose()
return out
}
)
stats.stage3_onet = Date.now() - ts
const scores = Array.from(await onetOut.scores.data())
const scoreDatas = await Promise.all(onetOuts.map(out => out.scores.data()))
const scores = scoreDatas.map(arr => Array.from(arr)).reduce((all, arr) => all.concat(arr))
const indices = scores
.map((score, idx) => ({ score, idx }))
.filter(c => c.score > scoreThreshold)
.map(({ idx }) => idx)
const filteredRegions = indices.map(idx => new BoundingBox(
onetOut.regions.get(idx, 0),
onetOut.regions.get(idx, 1),
onetOut.regions.get(idx, 2),
onetOut.regions.get(idx, 3)
onetOuts[idx].regions.get(0, 0),
onetOuts[idx].regions.get(0, 1),
onetOuts[idx].regions.get(0, 2),
onetOuts[idx].regions.get(0, 3)
))
const filteredBoxes = indices
.map((idx, i) => inputBoxes[idx].calibrate(filteredRegions[i]))
......@@ -39,28 +50,32 @@ export async function stage3(
if (filteredBoxes.length > 0) {
ts = Date.now()
const indicesNms = nms(
filteredBoxes,
filteredScores,
0.7,
false
)
stats.stage3_nms = Date.now() - ts
finalBoxes = indicesNms.map(idx => filteredBoxes[idx])
finalScores = indicesNms.map(idx => filteredScores[idx])
points = indicesNms.map((idx, i) =>
Array(5).fill(0).map((_, ptIdx) =>
new Point(
((onetOut.points.get(idx, ptIdx) * (finalBoxes[i].width + 1)) + finalBoxes[i].left) ,
((onetOut.points.get(idx, ptIdx + 5) * (finalBoxes[i].height + 1)) + finalBoxes[i].top)
((onetOuts[idx].points.get(0, ptIdx) * (finalBoxes[i].width + 1)) + finalBoxes[i].left) ,
((onetOuts[idx].points.get(0, ptIdx + 5) * (finalBoxes[i].height + 1)) + finalBoxes[i].top)
)
)
)
}
onetOut.regions.dispose()
onetOut.scores.dispose()
onetOut.points.dispose()
onetOuts.forEach(t => {
t.regions.dispose()
t.scores.dispose()
t.points.dispose()
})
return {
boxes: finalBoxes,
......
import { tf } from '..';
import { ConvParams, FCParams } from '../commons/types';
import { BoundingBox } from './BoundingBox';
import { FaceDetection } from '../faceDetectionNet/FaceDetection';
import { FaceLandmarks5 } from './FaceLandmarks5';
export type SharedParams = {
conv1: ConvParams
......@@ -37,4 +38,9 @@ export type NetParams = {
pnet: PNetParams
rnet: RNetParams
onet: ONetParams
}
export type MtcnnResult = {
faceDetection: FaceDetection,
faceLandmarks: FaceLandmarks5
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment