implemented stage2

5e957ec7 · vincent · 084cd30a · 5e957ec7 · 5e957ec7 · 5e957ec7
Commit 5e957ec7 authored Jul 10, 2018 by vincent
11 changed files
--- a/src/NetInput.ts
+++ b/src/NetInput.ts
@@ -8,6 +8,7 @@ import { createCanvasFromMedia } from './utils';

 export class NetInput {
  private _inputs: tf.Tensor3D[] = []
+  private _canvases: HTMLCanvasElement[] = []
  private _isManaged: boolean = false
  private _isBatchInput: boolean = false

@@ -16,14 +17,15 @@ export class NetInput {

  constructor(
    inputs: tf.Tensor4D | Array<TResolvedNetInput>,
-    isBatchInput: boolean = false
+    isBatchInput: boolean = false,
+    keepCanvases: boolean = false
  ) {
    if (isTensor4D(inputs)) {
      this._inputs = tf.unstack(inputs as tf.Tensor4D) as tf.Tensor3D[]
    }

    if (Array.isArray(inputs)) {
-      this._inputs = inputs.map(input => {
+      this._inputs = inputs.map((input, idx) => {
        if (isTensor3D(input)) {
          // TODO: make sure not to dispose original tensors passed in by the user
          return tf.clone(input as tf.Tensor3D)
@@ -39,9 +41,11 @@ export class NetInput {
          return (input as tf.Tensor4D).reshape(shape.slice(1) as [number, number, number]) as tf.Tensor3D
        }

-        return tf.fromPixels(
-          input instanceof HTMLCanvasElement ? input : createCanvasFromMedia(input as HTMLImageElement | HTMLVideoElement)
-        )
+        const canvas = input instanceof HTMLCanvasElement ? input : createCanvasFromMedia(input as HTMLImageElement | HTMLVideoElement)
+        if (keepCanvases) {
+          this._canvases[idx] = canvas
+        }
+        return tf.fromPixels(canvas)
      })
    }

@@ -53,6 +57,10 @@ export class NetInput {
    return this._inputs
  }

+  public get canvases(): HTMLCanvasElement[] {
+    return this._canvases
+  }
+
  public get isManaged(): boolean {
    return this._isManaged
  }

--- a/src/mtcnn/BoundingBox.ts
+++ b/src/mtcnn/BoundingBox.ts
@@ -55,4 +55,38 @@ export class BoundingBox {
      Math.round(this.bottom)
    )
  }
+
+  public padAtBorders(imageHeight: number, imageWidth: number) {
+    const w = this.width + 1
+    const h = this.height + 1
+
+    let dx = 1
+    let dy = 1
+    let edx = w
+    let edy = h
+
+    let x = this.left
+    let y = this.top
+    let ex = this.right
+    let ey = this.bottom
+
+    if (ex > imageWidth) {
+      edx = -ex + imageWidth + w
+      ex = imageWidth
+    }
+    if (ey > imageHeight) {
+      edy = -ey + imageHeight + h
+      ey = imageHeight
+    }
+    if (x < 1) {
+      edy = 2 - x
+      x = 1
+    }
+    if (y < 1) {
+      edy = 2 - y
+      y = 1
+    }
+
+    return { dy, edy, dx, edx, y, ey, x, ex, w, h }
+  }
 }
\ No newline at end of file
--- a/src/mtcnn/Mtcnn.ts
+++ b/src/mtcnn/Mtcnn.ts
@@ -8,6 +8,7 @@ import { bgrToRgbTensor } from './bgrToRgbTensor';
 import { extractParams } from './extractParams';
 import { pyramidDown } from './pyramidDown';
 import { stage1 } from './stage1';
+import { stage2 } from './stage2';
 import { NetParams } from './types';

 export class Mtcnn extends NeuralNetwork<NetParams> {
@@ -16,12 +17,12 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
    super('Mtcnn')
  }

-  public forwardInput(
+  public async forwardInput(
    input: NetInput,
    minFaceSize: number = 20,
    scaleFactor: number = 0.709,
    scoreThresholds: number[] = [0.6, 0.7, 0.7]
-  ): tf.Tensor2D {
+  ): Promise<tf.Tensor2D> {

    const { params } = this

@@ -29,18 +30,32 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
      throw new Error('Mtcnn - load model before inference')
    }

-    return tf.tidy(() => {
-      // TODO: expects bgr input?
-      let imgTensor = bgrToRgbTensor(
-        tf.expandDims(input.inputs[0]).toFloat() as tf.Tensor4D
+    const inputTensor = input.inputs[0]
+    const inputCanvas = input.canvases[0]
+
+    if (!inputCanvas) {
+      throw new Error('Mtcnn - inputCanvas is not defined, note that passing tensors into Mtcnn.forwardInput is not supported yet.')
+    }
+
+    const imgTensor = tf.tidy(() =>
+      bgrToRgbTensor(
+        tf.expandDims(inputTensor).toFloat() as tf.Tensor4D
      )
+    )
+
+    const scales = pyramidDown(minFaceSize, scaleFactor, imgTensor.shape.slice(1))
+    const out1 = await stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
+
+    // using the inputCanvas to extract and resize the image patches, since it is faster
+    // than doing this on the gpu
+    const out2 = await stage2(inputCanvas, out1, scoreThresholds[1], params.rnet)
+

-      const scales = pyramidDown(minFaceSize, scaleFactor, imgTensor.shape.slice(1))

-      const out1 = stage1(imgTensor, scales, scoreThresholds[0], params.pnet)
+    imgTensor.dispose()
+    input.dispose()

-      return tf.tensor2d([0], [1, 1])
-    })
+    return tf.tensor2d([0], [1, 1])
  }

  public async forward(
@@ -50,7 +65,7 @@ export class Mtcnn extends NeuralNetwork<NetParams> {
    scoreThresholds: number[] = [0.6, 0.7, 0.7]
  ): Promise<tf.Tensor2D> {
    return this.forwardInput(
-      await toNetInput(input, true),
+      await toNetInput(input, true, true),
      minFaceSize,
      scaleFactor,
      scoreThresholds

--- a/src/mtcnn/RNet.ts
+++ b/src/mtcnn/RNet.ts
+import * as tf from '@tensorflow/tfjs-core';
+
+import { fullyConnectedLayer } from '../faceLandmarkNet/fullyConnectedLayer';
+import { prelu } from './prelu';
+import { sharedLayer } from './sharedLayers';
+import { RNetParams } from './types';
+
+export function RNet(x: tf.Tensor4D, params: RNetParams): { prob: tf.Tensor2D, regions: tf.Tensor2D } {
+  return tf.tidy(() => {
+
+    const convOut = sharedLayer(x, params)
+    const vectorized = tf.reshape(convOut, [convOut.shape[0], params.fc1.weights.shape[0]]) as tf.Tensor2D
+    const fc1 = fullyConnectedLayer(vectorized, params.fc1)
+    const prelu4 = prelu<tf.Tensor2D>(fc1, params.prelu4_alpha)
+    const fc2_1 = fullyConnectedLayer(prelu4, params.fc2_1)
+    const max = tf.expandDims(tf.max(fc2_1, 1), 1)
+    const prob = tf.softmax(tf.sub(fc2_1, max), 1) as tf.Tensor2D
+    const regions = fullyConnectedLayer(prelu4, params.fc2_2)
+
+    return { prob, regions }
+  })
+}
\ No newline at end of file
--- a/src/mtcnn/bgrToRgbTensor.ts
+++ b/src/mtcnn/bgrToRgbTensor.ts
@@ -2,6 +2,6 @@ import * as tf from '@tensorflow/tfjs-core';

 export function bgrToRgbTensor(tensor: tf.Tensor4D): tf.Tensor4D {
  return tf.tidy(
-    () => tf.stack(tf.unstack(tensor, 3), 3)
+    () => tf.stack(tf.unstack(tensor, 3).reverse(), 3)
  ) as tf.Tensor4D
 }
\ No newline at end of file
--- a/src/mtcnn/normalize.ts
+++ b/src/mtcnn/normalize.ts
+import * as tf from '@tensorflow/tfjs-core';
+
+export function normalize(x: tf.Tensor4D): tf.Tensor4D {
+  return tf.tidy(
+    () => tf.mul(tf.sub(x, tf.scalar(127.5)), tf.scalar(0.0078125))
+  )
+}
\ No newline at end of file
--- a/src/mtcnn/prelu.ts
+++ b/src/mtcnn/prelu.ts
 import * as tf from '@tensorflow/tfjs-core';

-export function prelu(x: tf.Tensor4D, alpha: tf.Tensor1D): tf.Tensor4D {
+export function prelu<T extends tf.Tensor>(x: T, alpha: tf.Tensor1D): T {
  return tf.tidy(() =>
    tf.add(
      tf.relu(x),

--- a/src/mtcnn/sharedLayers.ts
+++ b/src/mtcnn/sharedLayers.ts
@@ -8,13 +8,13 @@ export function sharedLayer(x: tf.Tensor4D, params: SharedParams, isPnet: boolea
  return tf.tidy(() => {

    let out = convLayer(x, params.conv1, 'valid')
-    out = prelu(out, params.prelu1_alpha)
+    out = prelu<tf.Tensor4D>(out, params.prelu1_alpha)
    out = tf.maxPool(out, isPnet ? [2, 2]: [3, 3], [2, 2], 'same')
    out = convLayer(out, params.conv2, 'valid')
-    out = prelu(out, params.prelu2_alpha)
+    out = prelu<tf.Tensor4D>(out, params.prelu2_alpha)
    out = isPnet ? out : tf.maxPool(out, [3, 3], [2, 2], 'valid')
    out = convLayer(out, params.conv3, 'valid')
-    out = prelu(out, params.prelu3_alpha)
+    out = prelu<tf.Tensor4D>(out, params.prelu3_alpha)

    return out
  })

--- a/src/mtcnn/stage1.ts
+++ b/src/mtcnn/stage1.ts
@@ -79,13 +79,12 @@ export function stage1(
      const { prob, regions } = PNet(resized, params)


-      const scores = tf.unstack(prob, 3)[1]
-      const [sh, sw] = scores.shape.slice(1)
-      const [rh, rw] = regions.shape.slice(1)
+      const scoresTensor = tf.unstack(tf.unstack(prob, 3)[1])[0] as tf.Tensor2D
+      const regionsTensor = tf.unstack(regions)[0] as tf.Tensor3D

      return {
-        scoresTensor: scores.as2D(sh, sw),
-        regionsTensor: regions.as3D(rh, rw, 4)
+        scoresTensor,
+        regionsTensor
      }
    })


--- a/src/mtcnn/stage2.ts
+++ b/src/mtcnn/stage2.ts
+import * as tf from '@tensorflow/tfjs-core';
+
+import { createCanvas, getContext2dOrThrow } from '../utils';
+import { bgrToRgbTensor } from './bgrToRgbTensor';
+import { BoundingBox } from './BoundingBox';
+import { nms } from './nms';
+import { normalize } from './normalize';
+import { RNet } from './RNet';
+import { RNetParams } from './types';
+
+export async function stage2(
+  img: HTMLCanvasElement,
+  boxes: { box: BoundingBox, score: number }[],
+  scoreThreshold: number,
+  params: RNetParams
+) {
+
+  const { height, width } = img
+
+  const imgCtx = getContext2dOrThrow(img)
+
+  const bitmaps = await Promise.all(boxes.map(async ({ box }) => {
+    // TODO: correct padding
+    const { y, ey, x, ex } = box.padAtBorders(height, width)
+
+    const fromX = x - 1
+    const fromY = y - 1
+    const imgData = imgCtx.getImageData(fromX, fromY, (ex - fromX), (ey - fromY))
+
+    return createImageBitmap(imgData)
+  }))
+
+  const imagePatchesData: number[] = []
+
+  bitmaps.forEach(bmp => {
+    const patch = createCanvas({ width: 24, height: 24 })
+    const patchCtx = getContext2dOrThrow(patch)
+    patchCtx.drawImage(bmp, 0, 0, 24, 24)
+    const { data } = patchCtx.getImageData(0, 0, 24, 24)
+
+    for(let i = 0; i < data.length; i++) {
+      if ((i + 1) % 4 === 0) continue
+      imagePatchesData.push(data[i])
+    }
+  })
+
+  const rnetOut = tf.tidy(() => {
+    const imagePatchTensor = bgrToRgbTensor(tf.transpose(
+      tf.tensor4d(imagePatchesData, [boxes.length, 24, 24, 3]),
+      [0, 2, 1, 3]
+    ).toFloat()) as tf.Tensor4D
+
+    const normalized = normalize(imagePatchTensor)
+
+    const { prob, regions } = RNet(normalized, params)
+    return {
+      scores: tf.unstack(prob, 1)[1],
+      regions
+    }
+  })
+
+  const scores = Array.from(await rnetOut.scores.data())
+
+  const indices = scores
+    .map((score, idx) => ({ score, idx }))
+    .filter(c => c.score > scoreThreshold)
+    .map(({ idx }) => idx)
+
+  const filteredBoxes = indices.map(idx => boxes[idx].box)
+  const filteredScores = indices.map(idx => scores[idx])
+
+  let finalBoxes: BoundingBox[] = []
+  let finalScores: number[] = []
+
+  if (filteredBoxes.length > 0) {
+    const indicesNms = nms(
+      filteredBoxes,
+      filteredScores,
+      0.7
+    )
+
+    finalScores = indicesNms.map(idx => filteredScores[idx])
+    finalBoxes = indicesNms
+      .map(idx => {
+        const box = filteredBoxes[idx]
+        const [rleft, rtop, right, rbottom] = [
+          rnetOut.regions.get(indices[idx], 0),
+          rnetOut.regions.get(indices[idx], 1),
+          rnetOut.regions.get(indices[idx], 2),
+          rnetOut.regions.get(indices[idx], 3)
+        ]
+
+        return new BoundingBox(
+          box.left + (rleft * box.width),
+          box.top + (rtop * box.height),
+          box.right + (right * box.width),
+          box.bottom + (rbottom * box.height)
+        ).toSquare().round()
+      })
+  }
+
+  rnetOut.regions.dispose()
+  rnetOut.scores.dispose()
+
+  return {
+    finalBoxes,
+    finalScores
+  }
+}
\ No newline at end of file
--- a/src/toNetInput.ts
+++ b/src/toNetInput.ts
@@ -17,7 +17,8 @@ import { awaitMediaLoaded, resolveInput } from './utils';
 */
 export async function toNetInput(
  inputs: TNetInput,
-  manageCreatedInput: boolean = false
+  manageCreatedInput: boolean = false,
+  keepCanvases: boolean = false
 ): Promise<NetInput> {
  if (inputs instanceof NetInput) {
    return inputs
@@ -67,5 +68,5 @@ export async function toNetInput(
    inputArray.map(input => isMediaElement(input) && awaitMediaLoaded(input))
  )

-  return afterCreate(new NetInput(inputArray, Array.isArray(inputs)))
+  return afterCreate(new NetInput(inputArray, Array.isArray(inputs), keepCanvases))
 }
\ No newline at end of file