implemented mobilenetv1 for face detector

2efff4f7 · vincent · 6f8221a8 · 2efff4f7 · 2efff4f7 · 2efff4f7
Commit 2efff4f7 authored Jun 04, 2018 by vincent
7 changed files
--- a/src/faceDetectionNet/extractParams.ts
+++ b/src/faceDetectionNet/extractParams.ts
@@ -5,27 +5,27 @@ import { FaceDetectionNet } from './types';
 function mobilenetV1WeightsExtractorsFactory(extractWeights: (numWeights: number) => Float32Array) {
  function extractDepthwiseConvParams(numChannels: number): FaceDetectionNet.MobileNetV1.DepthwiseConvParams {
-    const weights = tf.tensor4d(extractWeights(3 * 3 * numChannels), [3, 3, numChannels, 1])
+    const filters = tf.tensor4d(extractWeights(3 * 3 * numChannels), [3, 3, numChannels, 1])
-    const batch_norm_gamma = tf.tensor1d(extractWeights(numChannels))
+    const batch_norm_scale = tf.tensor1d(extractWeights(numChannels))
-    const batch_norm_beta = tf.tensor1d(extractWeights(numChannels))
+    const batch_norm_offset = tf.tensor1d(extractWeights(numChannels))
    const batch_norm_mean = tf.tensor1d(extractWeights(numChannels))
    const batch_norm_variance = tf.tensor1d(extractWeights(numChannels))
    return {
-      weights,
+      filters,
-      batch_norm_gamma,
+      batch_norm_scale,
-      batch_norm_beta,
+      batch_norm_offset,
      batch_norm_mean,
      batch_norm_variance
    }
  }
  function extractPointwiseConvParams(channelsIn: number, channelsOut: number): FaceDetectionNet.MobileNetV1.PointwiseConvParams {
-    const weights = tf.tensor4d(extractWeights(channelsIn * channelsOut), [1, 1, channelsIn, channelsOut])
+    const filters = tf.tensor4d(extractWeights(channelsIn * channelsOut), [1, 1, channelsIn, channelsOut])
    const batch_norm_offset = tf.tensor1d(extractWeights(channelsOut))
    return {
-      weights,
+      filters,
      batch_norm_offset
    }
  }
@@ -59,7 +59,7 @@ function extractorsFactory(extractWeights: (numWeights: number) => Float32Array)
  function extractMobilenetV1Params(): FaceDetectionNet.MobileNetV1.Params {
    const conv_0_params = {
-      weights: tf.tensor4d(extractWeights(3 * 3 * 3 * 32), [3, 3, 3, 32]),
+      filters: tf.tensor4d(extractWeights(3 * 3 * 3 * 32), [3, 3, 3, 32]),
      batch_norm_offset: tf.tensor1d(extractWeights(32))
    }

--- a/src/faceDetectionNet/index.ts
+++ b/src/faceDetectionNet/index.ts
 import * as tf from '@tensorflow/tfjs-core';
-import { resizeLayer } from './resizeLayer';
+import { isFloat } from '../utils';
 import { extractParams } from './extractParams';
 import { mobileNetV1 } from './mobileNetV1';
+import { resizeLayer } from './resizeLayer';
+function fromData(input: number[]): tf.Tensor4D {
+  const pxPerChannel = input.length / 3
+  const dim = Math.sqrt(pxPerChannel)
+  if (isFloat(dim)) {
+    throw new Error(`invalid input size: ${dim}x${dim}x3 (array length: ${input.length})`)
+  }
+  return tf.tensor4d(input as number[], [1, 580, 580, 3])
+}
+function fromImageData(input: ImageData[]) {
+  const idx = input.findIndex(data => !(data instanceof ImageData))
+  if (idx !== -1) {
+    throw new Error(`expected input at index ${idx} to be instanceof ImageData`)
+  }
+  const imgTensors = input
+    .map(data => tf.fromPixels(data))
+    .map(data => tf.expandDims(data, 0)) as tf.Tensor4D[]
+  return tf.cast(tf.concat(imgTensors, 0), 'float32')
+}
 export function faceDetectionNet(weights: Float32Array) {
  const params = extractParams(weights)
-  async function forward(input: ImageData|ImageData[]) {
+  async function forward(input: ImageData|ImageData[]|number[]) {
+    return tf.tidy(() => {
-    const imgTensors = (input instanceof ImageData ? [input] : input)
+      const imgDataArray = input instanceof ImageData
-      .map(data => tf.fromPixels(data))
+        ? [input]
-      .map(data => tf.expandDims(data, 0)) as tf.Tensor4D[]
+        : (
+          input[0] instanceof ImageData
+            ? input as ImageData[]
+            : null
+        )
-    const imgTensor = tf.cast(tf.concat(imgTensors, 0), 'float32')
+      const imgTensor = imgDataArray !== null
+        ? fromImageData(imgDataArray)
+        : fromData(input as number[])
-    let out = resizeLayer(imgTensor) as tf.Tensor4D
+      let out = resizeLayer(imgTensor) as tf.Tensor4D
+      out = mobileNetV1(out, params.mobilenetv1_params)
-    out = mobileNetV1(out, params.mobilenetv1_params)
+      return out
-    return out
+    })
  }
  return {

--- a/src/faceDetectionNet/mobileNetV1.ts
+++ b/src/faceDetectionNet/mobileNetV1.ts
@@ -2,6 +2,57 @@ import * as tf from '@tensorflow/tfjs-core';
 import { FaceDetectionNet } from './types';
+const epsilon = 0.0010000000474974513
+function depthwiseConvLayer(
+  x: tf.Tensor4D,
+  params: FaceDetectionNet.MobileNetV1.DepthwiseConvParams,
+  strides: [number, number]
+) {
+  return tf.tidy(() => {
+    let out = tf.depthwiseConv2d(x, params.filters, strides, 'same')
+    out = tf.batchNormalization<tf.Rank.R4>(
+      out,
+      params.batch_norm_mean,
+      params.batch_norm_variance,
+      epsilon,
+      params.batch_norm_scale,
+      params.batch_norm_offset
+    )
+    return tf.relu(out)
+  })
+}
+function pointwiseConvLayer(
+  x: tf.Tensor4D,
+  params: FaceDetectionNet.MobileNetV1.PointwiseConvParams,
+  strides: [number, number]
+) {
+  return tf.tidy(() => {
+    let out = tf.conv2d(x, params.filters, strides, 'same')
+    out = tf.add(out, params.batch_norm_offset)
+    return tf.relu(out)
+  })
+}
+function getStridesForLayerIdx(layerIdx: number): [number, number] {
+  return [2, 4, 6, 12].some(idx => idx === layerIdx) ? [2, 2] : [1, 1]
+}
 export function mobileNetV1(x: tf.Tensor4D, params: FaceDetectionNet.MobileNetV1.Params) {
-  return x
+  return tf.tidy(() => {
+    let out = pointwiseConvLayer(x, params.conv_0_params, [2, 2])
+    params.conv_pair_params.forEach((param, i) => {
+      const depthwiseConvStrides = getStridesForLayerIdx(i + 1)
+      out = depthwiseConvLayer(out, param.depthwise_conv_params, depthwiseConvStrides)
+      out = pointwiseConvLayer(out, param.pointwise_conv_params, [1, 1])
+    })
+    return out
+  })
 }
\ No newline at end of file
--- a/src/faceDetectionNet/resizeLayer.ts
+++ b/src/faceDetectionNet/resizeLayer.ts
 import * as tf from '@tensorflow/tfjs-core';
-// TODO: hardcoded params
 const resizedImageSize = [512, 512] as [number, number]
 const weight = tf.scalar(0.007843137718737125)
 const bias = tf.scalar(1)
 export function resizeLayer(x: tf.Tensor4D) {
-  const resized = tf.image.resizeBilinear(x, resizedImageSize, false)
+  return tf.tidy(() => {
-  return tf.sub(tf.mul(resized, weight), bias)
+    const resized = tf.image.resizeBilinear(x, resizedImageSize, false)
+    return tf.sub(tf.mul(resized, weight), bias)
+  })
 }
\ No newline at end of file
--- a/src/faceDetectionNet/types.ts
+++ b/src/faceDetectionNet/types.ts
@@ -5,15 +5,15 @@ export namespace FaceDetectionNet {
  export namespace MobileNetV1 {
    export type DepthwiseConvParams = {
-      weights: tf.Tensor4D // [3, 3, ch, 1]
+      filters: tf.Tensor4D
-      batch_norm_gamma: tf.Tensor1D
+      batch_norm_scale: tf.Tensor1D
-      batch_norm_beta: tf.Tensor1D
+      batch_norm_offset: tf.Tensor1D
      batch_norm_mean: tf.Tensor1D
      batch_norm_variance: tf.Tensor1D
    }
    export type PointwiseConvParams = {
-      weights: tf.Tensor4D // [1, 1, ch_in, ch_out]
+      filters: tf.Tensor4D
      batch_norm_offset: tf.Tensor1D
    }

--- a/src/faceRecognitionNet/extractParams.ts
+++ b/src/faceRecognitionNet/extractParams.ts
 import * as tf from '@tensorflow/tfjs-core';
+import { isFloat } from '../utils';
 import { FaceRecognitionNet } from './types';
-function isFloat(num: number) {
-  return num % 1 !== 0
-}
 function extractorsFactory(extractWeights: (numWeights: number) => Float32Array) {
  function extractFilterValues(numFilterValues: number, numFilters: number, filterSize: number): tf.Tensor4D {

--- a/src/utils.ts
+++ b/src/utils.ts
+export function isFloat(num: number) {
+  return num % 1 !== 0
+}