runnin nets sequentially instead of in batches seems to be faster + gather runtime stats

08aae43d · vincent · e04770cb · 08aae43d · 08aae43d · 08aae43d
Commit 08aae43d authored Jul 12, 2018 by vincent
7 changed files
--- a/src/mtcnn/Mtcnn.ts
+++ b/src/mtcnn/Mtcnn.ts
@@ -10,11 +10,12 @@ import { TNetInput } from '../types';
 import { bgrToRgbTensor } from './bgrToRgbTensor';
 import { extractParams } from './extractParams';
 import { FaceLandmarks5 } from './FaceLandmarks5';
+import { getSizesForScale } from './getSizesForScale';
 import { pyramidDown } from './pyramidDown';
 import { stage1 } from './stage1';
 import { stage2 } from './stage2';
 import { stage3 } from './stage3';
-import { NetParams } from './types';
+import { MtcnnResult, NetParams } from './types';
 export class Mtcnn extends NeuralNetwork<NetParams> {
@@ -26,8 +27,9 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
    input: NetInput,
    minFaceSize: number = 20,
    scaleFactor: number = 0.709,
+    maxNumScales: number = 10,
    scoreThresholds: number[] = [0.6, 0.7, 0.7]
-  ): Promise<any> {
+  ): Promise<{ results: MtcnnResult[], stats: any }> {
    const { params } = this
@@ -42,6 +44,10 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
      throw new Error('Mtcnn - inputCanvas is not defined, note that passing tensors into Mtcnn.forwardInput is not supported yet.')
    }
+    const stats: any = {}
+    const tsTotal = Date.now()
    const imgTensor = tf.tidy(() =>
      bgrToRgbTensor(
        tf.expandDims(inputTensor).toFloat() as tf.Tensor4D
@@ -51,18 +57,47 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
    const [height, width] = imgTensor.shape.slice(1)
    const scales = pyramidDown(minFaceSize, scaleFactor, [height, width])
-    const out1 = await stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
+      .filter(scale => {
+        const sizes = getSizesForScale(scale, [height, width])
+        return Math.min(sizes.width, sizes.height) > 48
+      })
+      .slice(0, maxNumScales)
+    stats.scales = scales
+    stats.pyramid = scales.map(scale => getSizesForScale(scale, [height, width]))
+    let ts = Date.now()
+    const out1 = await stage1(imgTensor, scales, scoreThresholds[0], params.pnet, stats)
+    stats.total_stage1 = Date.now() - ts
+    if (!out1.boxes.length) {
+      stats.total = Date.now() - tsTotal
+      return { results: [], stats }
+    }
+    stats.stage2_numInputBoxes = out1.boxes.length
    // using the inputCanvas to extract and resize the image patches, since it is faster
    // than doing this on the gpu
-    const out2 = await stage2(inputCanvas, out1.boxes, scoreThresholds[1], params.rnet)
+    ts = Date.now()
-    const out3 = await stage3(inputCanvas, out2.boxes, scoreThresholds[2], params.onet)
+    const out2 = await stage2(inputCanvas, out1.boxes, scoreThresholds[1], params.rnet, stats)
+    stats.total_stage2 = Date.now() - ts
+    if (!out2.boxes.length) {
+      stats.total = Date.now() - tsTotal
+      return { results: [], stats }
+    }
+    stats.stage3_numInputBoxes = out2.boxes.length
+    ts = Date.now()
+    const out3 = await stage3(inputCanvas, out2.boxes, scoreThresholds[2], params.onet, stats)
+    stats.total_stage3 = Date.now() - ts
    imgTensor.dispose()
    input.dispose()
-    const faceDetections = out3.boxes.map((box, idx) =>
+    const results = out3.boxes.map((box, idx) => ({
-      new FaceDetection(
+      faceDetection: new FaceDetection(
        out3.scores[idx],
        new Rect(
          box.left / width,
@@ -74,32 +109,47 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
          height,
          width
        }
-      )
+      ),
-    )
+      faceLandmarks: new FaceLandmarks5(
+        out3.points[idx].map(pt => pt.div(new Point(width, height))),
-    const faceLandmarks = out3.points.map(pts =>
-      new FaceLandmarks5(
-        pts.map(pt => pt.div(new Point(width, height))),
        { width, height }
      )
-    )
+    }))
-    return {
+    stats.total = Date.now() - tsTotal
-      faceDetections,
+    return { results, stats }
-      faceLandmarks
-    }
  }
  public async forward(
    input: TNetInput,
    minFaceSize: number = 20,
    scaleFactor: number = 0.709,
+    maxNumScales: number = 10,
+    scoreThresholds: number[] = [0.6, 0.7, 0.7]
+  ): Promise<MtcnnResult[]> {
+    return (
+      await this.forwardInput(
+        await toNetInput(input, true, true),
+        minFaceSize,
+        scaleFactor,
+        maxNumScales,
+        scoreThresholds
+      )
+    ).results
+  }
+  public async forwardWithStats(
+    input: TNetInput,
+    minFaceSize: number = 20,
+    scaleFactor: number = 0.709,
+    maxNumScales: number = 10,
    scoreThresholds: number[] = [0.6, 0.7, 0.7]
-  ): Promise<tf.Tensor2D> {
+  ): Promise<{ results: MtcnnResult[], stats: any }> {
    return this.forwardInput(
      await toNetInput(input, true, true),
      minFaceSize,
      scaleFactor,
+      maxNumScales,
      scoreThresholds
    )
  }

--- a/src/mtcnn/extractImagePatches.ts
+++ b/src/mtcnn/extractImagePatches.ts
@@ -10,7 +10,7 @@ export async function extractImagePatches(
  img: HTMLCanvasElement,
  boxes: BoundingBox[],
  { width, height }: Dimensions
-): Promise<tf.Tensor4D> {
+): Promise<tf.Tensor4D[]> {
  const imgCtx = getContext2dOrThrow(img)
@@ -26,7 +26,7 @@ export async function extractImagePatches(
    return createImageBitmap(imgData)
  }))
-  const imagePatchesData: number[] = []
+  const imagePatchesDatas: number[][] = []
  bitmaps.forEach(bmp => {
    const patch = createCanvas({ width, height })
@@ -34,18 +34,24 @@ export async function extractImagePatches(
    patchCtx.drawImage(bmp, 0, 0, width, height)
    const { data } = patchCtx.getImageData(0, 0, width, height)
+    const currData = []
    for(let i = 0; i < data.length; i++) {
      if ((i + 1) % 4 === 0) continue
-      imagePatchesData.push(data[i])
+      currData.push(data[i])
    }
+    imagePatchesDatas.push(currData)
  })
-  return tf.tidy(() => {
+  return imagePatchesDatas.map(data => {
+    const t = tf.tidy(() => {
      const imagePatchTensor = bgrToRgbTensor(tf.transpose(
-      tf.tensor4d(imagePatchesData, [boxes.length, width, height, 3]),
+        tf.tensor4d(data, [1, width, height, 3]),
        [0, 2, 1, 3]
      ).toFloat()) as tf.Tensor4D
      return normalize(imagePatchTensor)
    })
+    return t
+  })
 }
\ No newline at end of file
--- a/src/mtcnn/getSizesForScale.ts
+++ b/src/mtcnn/getSizesForScale.ts
+export function getSizesForScale(scale: number, [height, width]: number[]) {
+  return {
+    height: Math.floor(height * scale),
+    width: Math.floor(width * scale)
+  }
+}
\ No newline at end of file
--- a/src/mtcnn/stage1.ts
+++ b/src/mtcnn/stage1.ts
@@ -7,12 +7,13 @@ import { nms } from './nms';
 import { normalize } from './normalize';
 import { PNet } from './PNet';
 import { PNetParams } from './types';
+import { getSizesForScale } from './getSizesForScale';
 function rescaleAndNormalize(x: tf.Tensor4D, scale: number): tf.Tensor4D {
  return tf.tidy(() => {
-    const [height, width] = x.shape.slice(1)
+    const { height, width } = getSizesForScale(scale, x.shape.slice(1))
-    const resized = tf.image.resizeBilinear(x, [Math.floor(height * scale), Math.floor(width * scale)])
+    const resized = tf.image.resizeBilinear(x, [height, width])
    const normalized = normalize(resized)
    return (tf.transpose(normalized, [0, 2, 1, 3]) as tf.Tensor4D)
@@ -67,17 +68,20 @@ export function stage1(
  imgTensor: tf.Tensor4D,
  scales: number[],
  scoreThreshold: number,
-  params: PNetParams
+  params: PNetParams,
+  stats: any
 ) {
+  stats.stage1 = []
-  const boxesForScale = scales.map((scale, i) => {
+  const boxesForScale = scales.map((scale) => {
+    const statsForScale: any = { scale }
    const { scoresTensor, regionsTensor } = tf.tidy(() => {
      const resized = rescaleAndNormalize(imgTensor, scale)
+      let ts = Date.now()
      const { prob, regions } = PNet(resized, params)
+      statsForScale.pnet = Date.now() - ts
      const scoresTensor = tf.unstack(tf.unstack(prob, 3)[1])[0] as tf.Tensor2D
      const regionsTensor = tf.unstack(regions)[0] as tf.Tensor3D
@@ -99,15 +103,20 @@ export function stage1(
    regionsTensor.dispose()
    if (!boundingBoxes.length) {
+      stats.stage1.push(statsForScale)
      return []
    }
+    let ts = Date.now()
    const indices = nms(
      boundingBoxes.map(bbox => bbox.cell),
      boundingBoxes.map(bbox => bbox.score),
      0.5
    )
+    statsForScale.nms = Date.now() - ts
+    statsForScale.numBoxes = indices.length
+    stats.stage1.push(statsForScale)
    return indices.map(boxIdx => boundingBoxes[boxIdx])
  })
@@ -119,11 +128,13 @@ export function stage1(
  let finalScores: number[] = []
  if (allBoxes.length > 0) {
+    let ts = Date.now()
    const indices = nms(
      allBoxes.map(bbox => bbox.cell),
      allBoxes.map(bbox => bbox.score),
      0.7
    )
+    stats.stage1_nms = Date.now() - ts
    finalScores = indices.map(idx => allBoxes[idx].score)
    finalBoxes = indices

--- a/src/mtcnn/stage2.ts
+++ b/src/mtcnn/stage2.ts
@@ -8,15 +8,26 @@ export async function stage2(
  img: HTMLCanvasElement,
  inputBoxes: BoundingBox[],
  scoreThreshold: number,
-  params: RNetParams
+  params: RNetParams,
+  stats: any
 ) {
-  const rnetInput = await extractImagePatches(img, inputBoxes, { width: 24, height: 24 })
+  let ts = Date.now()
-  const rnetOut = RNet(rnetInput, params)
+  const rnetInputs = await extractImagePatches(img, inputBoxes, { width: 24, height: 24 })
+  stats.stage2_extractImagePatches = Date.now() - ts
+  ts = Date.now()
+  const rnetOuts = rnetInputs.map(
+    rnetInput => {
+      const out = RNet(rnetInput, params)
      rnetInput.dispose()
+      return out
+    }
+  )
+  stats.stage2_rnet = Date.now() - ts
-  const scores = Array.from(await rnetOut.scores.data())
+  const scoreDatas = await Promise.all(rnetOuts.map(out => out.scores.data()))
+  const scores = scoreDatas.map(arr => Array.from(arr)).reduce((all, arr) => all.concat(arr))
  const indices = scores
    .map((score, idx) => ({ score, idx }))
    .filter(c => c.score > scoreThreshold)
@@ -29,18 +40,20 @@ export async function stage2(
  let finalScores: number[] = []
  if (filteredBoxes.length > 0) {
+    ts = Date.now()
    const indicesNms = nms(
      filteredBoxes,
      filteredScores,
      0.7
    )
+    stats.stage2_nms = Date.now() - ts
    const regions = indicesNms.map(idx =>
      new BoundingBox(
-        rnetOut.regions.get(indices[idx], 0),
+        rnetOuts[indices[idx]].regions.get(0, 0),
-        rnetOut.regions.get(indices[idx], 1),
+        rnetOuts[indices[idx]].regions.get(0, 1),
-        rnetOut.regions.get(indices[idx], 2),
+        rnetOuts[indices[idx]].regions.get(0, 2),
-        rnetOut.regions.get(indices[idx], 3)
+        rnetOuts[indices[idx]].regions.get(0, 3)
      )
    )
@@ -48,8 +61,10 @@ export async function stage2(
    finalBoxes = indicesNms.map((idx, i) => filteredBoxes[idx].calibrate(regions[i]))
  }
-  rnetOut.regions.dispose()
+  rnetOuts.forEach(t => {
-  rnetOut.scores.dispose()
+    t.regions.dispose()
+    t.scores.dispose()
+  })
  return {
    boxes: finalBoxes,

--- a/src/mtcnn/stage3.ts
+++ b/src/mtcnn/stage3.ts
@@ -9,25 +9,36 @@ export async function stage3(
  img: HTMLCanvasElement,
  inputBoxes: BoundingBox[],
  scoreThreshold: number,
-  params: ONetParams
+  params: ONetParams,
+  stats: any
 ) {
-  const onetInput = await extractImagePatches(img, inputBoxes, { width: 48, height: 48 })
+  let ts = Date.now()
-  const onetOut = ONet(onetInput, params)
+  const onetInputs = await extractImagePatches(img, inputBoxes, { width: 48, height: 48 })
+  stats.stage3_extractImagePatches = Date.now() - ts
+  ts = Date.now()
+  const onetOuts = onetInputs.map(
+    onetInput => {
+      const out = ONet(onetInput, params)
      onetInput.dispose()
+      return out
+    }
+  )
+  stats.stage3_onet = Date.now() - ts
-  const scores = Array.from(await onetOut.scores.data())
+  const scoreDatas = await Promise.all(onetOuts.map(out => out.scores.data()))
+  const scores = scoreDatas.map(arr => Array.from(arr)).reduce((all, arr) => all.concat(arr))
  const indices = scores
    .map((score, idx) => ({ score, idx }))
    .filter(c => c.score > scoreThreshold)
    .map(({ idx }) => idx)
  const filteredRegions = indices.map(idx => new BoundingBox(
-    onetOut.regions.get(idx, 0),
+    onetOuts[idx].regions.get(0, 0),
-    onetOut.regions.get(idx, 1),
+    onetOuts[idx].regions.get(0, 1),
-    onetOut.regions.get(idx, 2),
+    onetOuts[idx].regions.get(0, 2),
-    onetOut.regions.get(idx, 3)
+    onetOuts[idx].regions.get(0, 3)
  ))
  const filteredBoxes = indices
    .map((idx, i) => inputBoxes[idx].calibrate(filteredRegions[i]))
@@ -39,28 +50,32 @@ export async function stage3(
  if (filteredBoxes.length > 0) {
+    ts = Date.now()
    const indicesNms = nms(
      filteredBoxes,
      filteredScores,
      0.7,
      false
    )
+    stats.stage3_nms = Date.now() - ts
    finalBoxes = indicesNms.map(idx => filteredBoxes[idx])
    finalScores = indicesNms.map(idx => filteredScores[idx])
    points = indicesNms.map((idx, i) =>
      Array(5).fill(0).map((_, ptIdx) =>
        new Point(
-          ((onetOut.points.get(idx, ptIdx) * (finalBoxes[i].width + 1)) + finalBoxes[i].left) ,
+          ((onetOuts[idx].points.get(0, ptIdx) * (finalBoxes[i].width + 1)) + finalBoxes[i].left) ,
-          ((onetOut.points.get(idx, ptIdx + 5) * (finalBoxes[i].height + 1)) + finalBoxes[i].top)
+          ((onetOuts[idx].points.get(0, ptIdx + 5) * (finalBoxes[i].height + 1)) + finalBoxes[i].top)
        )
      )
    )
  }
-  onetOut.regions.dispose()
+  onetOuts.forEach(t => {
-  onetOut.scores.dispose()
+    t.regions.dispose()
-  onetOut.points.dispose()
+    t.scores.dispose()
+    t.points.dispose()
+  })
  return {
    boxes: finalBoxes,

--- a/src/mtcnn/types.ts
+++ b/src/mtcnn/types.ts
 import { tf } from '..';
 import { ConvParams, FCParams } from '../commons/types';
-import { BoundingBox } from './BoundingBox';
+import { FaceDetection } from '../faceDetectionNet/FaceDetection';
+import { FaceLandmarks5 } from './FaceLandmarks5';
 export type SharedParams = {
  conv1: ConvParams
@@ -38,3 +39,8 @@ export type NetParams = {
  rnet: RNetParams
  onet: ONetParams
 }
+export type MtcnnResult = {
+  faceDetection: FaceDetection,
+  faceLandmarks: FaceLandmarks5
+}
\ No newline at end of file