finalize architecture

45c9d6f8 · vincent · 23d4664d · 45c9d6f8 · 45c9d6f8 · 45c9d6f8
Commit 45c9d6f8 authored Jun 05, 2018 by vincent
Hide whitespace changes
Inline Side-by-side

Showing with 90 additions and 30 deletions

index.ts src/faceDetectionNet/index.ts +65 -18

outputLayer.ts src/faceDetectionNet/outputLayer.ts +21 -11

index.ts src/index.ts +4 -1

No files found.
--- a/src/faceDetectionNet/index.ts
+++ b/src/faceDetectionNet/index.ts
@@ -15,7 +15,7 @@ function fromData(input: number[]): tf.Tensor4D {
    throw new Error(`invalid input size: ${dim}x${dim}x3 (array length: ${input.length})`)
  }
-  return tf.tensor4d(input as number[], [1, 580, 580, 3])
+  return tf.tensor4d(input as number[], [1, dim, dim, 3])
 }
 function fromImageData(input: ImageData[]) {
@@ -31,24 +31,30 @@ function fromImageData(input: ImageData[]) {
  return tf.cast(tf.concat(imgTensors, 0), 'float32')
 }
+function getImgTensor(input: ImageData|ImageData[]|number[]) {
+  return tf.tidy(() => {
+    const imgDataArray = input instanceof ImageData
+      ? [input]
+      : (
+        input[0] instanceof ImageData
+          ? input as ImageData[]
+          : null
+      )
+    return imgDataArray !== null
+      ? fromImageData(imgDataArray)
+      : fromData(input as number[])
+  })
+}
 export function faceDetectionNet(weights: Float32Array) {
  const params = extractParams(weights)
-  async function forward(input: ImageData|ImageData[]|number[]) {
+  function forwardTensor(imgTensor: tf.Tensor4D) {
    return tf.tidy(() => {
-      const imgDataArray = input instanceof ImageData
-        ? [input]
-        : (
-          input[0] instanceof ImageData
-            ? input as ImageData[]
-            : null
-        )
-      const imgTensor = imgDataArray !== null
-        ? fromImageData(imgDataArray)
-        : fromData(input as number[])
      const resized = resizeLayer(imgTensor) as tf.Tensor4D
      const features = mobileNetV1(resized, params.mobilenetv1_params)
@@ -57,14 +63,54 @@ export function faceDetectionNet(weights: Float32Array) {
        classPredictions
      } = predictionLayer(features.out, features.conv11, params.prediction_layer_params)
-      const decoded = outputLayer(boxPredictions, classPredictions, params.output_layer_params)
+      return outputLayer(boxPredictions, classPredictions, params.output_layer_params)
+    })
+  }
+  // TODO debug output
+  function forward(input: ImageData|ImageData[]|number[]) {
+    return tf.tidy(
+      () => forwardTensor(getImgTensor(input))
+    )
+  }
-      return decoded
+  async function locateFaces(
+    input: ImageData|ImageData[]|number[],
+    minConfidence: number = 0.8
+  ) {
+    const imgTensor = getImgTensor(input)
+    const [_, height, width] = imgTensor.shape
+    const {
+      boxes: _boxes,
+      scores: _scores
+    } = forwardTensor(imgTensor)
+    // TODO batches
+    const boxes = _boxes[0]
+    const scores = _scores[0]
+    // TODO find a better way to filter by minConfidence
+    const data = await scores.data()
+    return Array.from(data)
+      .map((score, idx) => ({ score, idx }))
+      .filter(({ score }) => minConfidence < score)
+      .map(({ score, idx }) => ({
+        score,
+        box: {
+          left: Math.max(0, width * boxes.get(idx, 0)),
+          right: Math.min(width, width * boxes.get(idx, 1)),
+          top: Math.max(0, height * boxes.get(idx, 2)),
+          bottom: Math.min(height, height * boxes.get(idx, 3))
+        }
+      }))
-    })
  }
  return {
-    forward
+    forward,
+    locateFaces
  }
 }
\ No newline at end of file
--- a/src/faceDetectionNet/outputLayer.ts
+++ b/src/faceDetectionNet/outputLayer.ts
@@ -2,12 +2,6 @@ import * as tf from '@tensorflow/tfjs-core';
 import { FaceDetectionNet } from './types';
-function batchMultiClassNonMaxSuppressionLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) {
-  // TODO
-  return x0
-}
 function getCenterCoordinatesAndSizesLayer(x: tf.Tensor2D) {
  const vec = tf.unstack(tf.transpose(x, [1, 0]))
@@ -27,7 +21,7 @@ function getCenterCoordinatesAndSizesLayer(x: tf.Tensor2D) {
  }
 }
-function decodeLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) {
+function decodeBoxesLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) {
  const {
    sizes,
    centers
@@ -61,15 +55,30 @@ export function outputLayer(
    const batchSize = boxPredictions.shape[0]
-    const decoded = decodeLayer(
+    let boxes = decodeBoxesLayer(
      tf.reshape(tf.tile(params.extra_dim, [batchSize, 1, 1]), [-1, 4]) as tf.Tensor2D,
      tf.reshape(boxPredictions, [-1, 4]) as tf.Tensor2D
    )
+    boxes = tf.reshape(
+      boxes,
+      [batchSize, (boxes.shape[0] / batchSize), 4]
+    )
+    const scoresAndClasses = tf.sigmoid(tf.slice(classPredictions, [0, 0, 1], [-1, -1, -1]))
+    let scores = tf.slice(scoresAndClasses, [0, 0, 0], [-1, -1, 1]) as tf.Tensor
+    scores = tf.reshape(
+      scores,
+      [batchSize, scores.shape[1]]
+    )
-    const in1 = tf.sigmoid(tf.slice(classPredictions, [0, 0, 1], [-1, -1, -1]))
+    const boxesByBatch = tf.unstack(boxes) as tf.Tensor2D[]
-    const in2 = tf.expandDims(tf.reshape(decoded, [batchSize, 5118, 4]), 2)
+    const scoresByBatch = tf.unstack(scores) as tf.Tensor1D[]
-    return decoded
+    return {
+      boxes: boxesByBatch,
+      scores: scoresByBatch
+    }
  })
 }
\ No newline at end of file
--- a/src/index.ts
+++ b/src/index.ts
@@ -2,10 +2,12 @@ import { euclideanDistance } from './euclideanDistance';
 import { faceDetectionNet } from './faceDetectionNet';
 import { faceRecognitionNet } from './faceRecognitionNet';
 import { normalize } from './normalize';
+import * as tf from '@tensorflow/tfjs-core';
 export {
  euclideanDistance,
  faceDetectionNet,
  faceRecognitionNet,
-  normalize
+  normalize,
+  tf
 }
\ No newline at end of file