finalize architecture

45c9d6f8 · vincent · 23d4664d · 45c9d6f8 · 45c9d6f8 · 45c9d6f8
Commit 45c9d6f8 authored Jun 05, 2018 by vincent
Show whitespace changes
Inline Side-by-side

Showing with 82 additions and 22 deletions

index.ts src/faceDetectionNet/index.ts +57 -10

outputLayer.ts src/faceDetectionNet/outputLayer.ts +21 -11

index.ts src/index.ts +4 -1

No files found.
--- a/src/faceDetectionNet/index.ts
+++ b/src/faceDetectionNet/index.ts
@@ -15,7 +15,7 @@ function fromData(input: number[]): tf.Tensor4D {
    throw new Error(`invalid input size: ${dim}x${dim}x3 (array length: ${input.length})`)
  }

-  return tf.tensor4d(input as number[], [1, 580, 580, 3])
+  return tf.tensor4d(input as number[], [1, dim, dim, 3])
 }

 function fromImageData(input: ImageData[]) {
@@ -31,10 +31,7 @@ function fromImageData(input: ImageData[]) {
  return tf.cast(tf.concat(imgTensors, 0), 'float32')
 }

-export function faceDetectionNet(weights: Float32Array) {
-  const params = extractParams(weights)
-
-  async function forward(input: ImageData|ImageData[]|number[]) {
+function getImgTensor(input: ImageData|ImageData[]|number[]) {
  return tf.tidy(() => {

    const imgDataArray = input instanceof ImageData
@@ -45,10 +42,19 @@ export function faceDetectionNet(weights: Float32Array) {
          : null
      )

-      const imgTensor = imgDataArray !== null
+    return imgDataArray !== null
      ? fromImageData(imgDataArray)
      : fromData(input as number[])

+  })
+}
+
+export function faceDetectionNet(weights: Float32Array) {
+  const params = extractParams(weights)
+
+  function forwardTensor(imgTensor: tf.Tensor4D) {
+    return tf.tidy(() => {
+
      const resized = resizeLayer(imgTensor) as tf.Tensor4D
      const features = mobileNetV1(resized, params.mobilenetv1_params)

@@ -57,14 +63,54 @@ export function faceDetectionNet(weights: Float32Array) {
        classPredictions
      } = predictionLayer(features.out, features.conv11, params.prediction_layer_params)

-      const decoded = outputLayer(boxPredictions, classPredictions, params.output_layer_params)
+      return outputLayer(boxPredictions, classPredictions, params.output_layer_params)
+    })
+  }

-      return decoded
+  // TODO debug output
+  function forward(input: ImageData|ImageData[]|number[]) {
+    return tf.tidy(
+      () => forwardTensor(getImgTensor(input))
+    )
+  }
+
+  async function locateFaces(
+    input: ImageData|ImageData[]|number[],
+    minConfidence: number = 0.8
+  ) {
+    const imgTensor = getImgTensor(input)
+
+    const [_, height, width] = imgTensor.shape
+
+    const {
+      boxes: _boxes,
+      scores: _scores
+    } = forwardTensor(imgTensor)
+
+    // TODO batches
+    const boxes = _boxes[0]
+    const scores = _scores[0]
+
+    // TODO find a better way to filter by minConfidence
+    const data = await scores.data()
+
+    return Array.from(data)
+      .map((score, idx) => ({ score, idx }))
+      .filter(({ score }) => minConfidence < score)
+      .map(({ score, idx }) => ({
+        score,
+        box: {
+          left: Math.max(0, width * boxes.get(idx, 0)),
+          right: Math.min(width, width * boxes.get(idx, 1)),
+          top: Math.max(0, height * boxes.get(idx, 2)),
+          bottom: Math.min(height, height * boxes.get(idx, 3))
+        }
+      }))

-    })
  }

  return {
-    forward
+    forward,
+    locateFaces
  }
 }
\ No newline at end of file
--- a/src/faceDetectionNet/outputLayer.ts
+++ b/src/faceDetectionNet/outputLayer.ts
@@ -2,12 +2,6 @@ import * as tf from '@tensorflow/tfjs-core';

 import { FaceDetectionNet } from './types';

-
-function batchMultiClassNonMaxSuppressionLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) {
-  // TODO
-  return x0
-}
-
 function getCenterCoordinatesAndSizesLayer(x: tf.Tensor2D) {
  const vec = tf.unstack(tf.transpose(x, [1, 0]))

@@ -27,7 +21,7 @@ function getCenterCoordinatesAndSizesLayer(x: tf.Tensor2D) {
  }
 }

-function decodeLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) {
+function decodeBoxesLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) {
  const {
    sizes,
    centers
@@ -61,15 +55,30 @@ export function outputLayer(

    const batchSize = boxPredictions.shape[0]

-    const decoded = decodeLayer(
+    let boxes = decodeBoxesLayer(
      tf.reshape(tf.tile(params.extra_dim, [batchSize, 1, 1]), [-1, 4]) as tf.Tensor2D,
      tf.reshape(boxPredictions, [-1, 4]) as tf.Tensor2D
    )
+    boxes = tf.reshape(
+      boxes,
+      [batchSize, (boxes.shape[0] / batchSize), 4]
+    )

-    const in1 = tf.sigmoid(tf.slice(classPredictions, [0, 0, 1], [-1, -1, -1]))
-    const in2 = tf.expandDims(tf.reshape(decoded, [batchSize, 5118, 4]), 2)
+    const scoresAndClasses = tf.sigmoid(tf.slice(classPredictions, [0, 0, 1], [-1, -1, -1]))
+    let scores = tf.slice(scoresAndClasses, [0, 0, 0], [-1, -1, 1]) as tf.Tensor

-    return decoded
+    scores = tf.reshape(
+      scores,
+      [batchSize, scores.shape[1]]
+    )
+
+    const boxesByBatch = tf.unstack(boxes) as tf.Tensor2D[]
+    const scoresByBatch = tf.unstack(scores) as tf.Tensor1D[]
+
+    return {
+      boxes: boxesByBatch,
+      scores: scoresByBatch
+    }

  })
 }
\ No newline at end of file
--- a/src/index.ts
+++ b/src/index.ts
@@ -2,10 +2,12 @@ import { euclideanDistance } from './euclideanDistance';
 import { faceDetectionNet } from './faceDetectionNet';
 import { faceRecognitionNet } from './faceRecognitionNet';
 import { normalize } from './normalize';
+import * as tf from '@tensorflow/tfjs-core';

 export {
  euclideanDistance,
  faceDetectionNet,
  faceRecognitionNet,
-  normalize
+  normalize,
+  tf
 }
\ No newline at end of file