pad input of face landmark net to square and center to avoid stretching of non square images

554bbe05 · vincent · fd698812 · 554bbe05 · 554bbe05 · 554bbe05
Commit 554bbe05 authored Jul 01, 2018 by vincent
Hide whitespace changes
Inline Side-by-side

Showing with 78 additions and 21 deletions

Rect.ts src/Rect.ts +14 -0

FaceLandmarkNet.ts src/faceLandmarkNet/FaceLandmarkNet.ts +60 -20

faceLandmarkNet.test.ts test/tests/e2e/faceLandmarkNet.test.ts +4 -1

No files found.
--- a/src/Rect.ts
+++ b/src/Rect.ts
@@ -18,6 +18,20 @@ export class Rect implements IRect {
    this.height = height
  }
+  public toSquare(): Rect {
+    let { x, y, width, height } = this
+    const diff = Math.abs(width - height)
+    if (width < height) {
+      x -= (diff / 2)
+      width += diff
+    }
+    if (height < width) {
+      y -= (diff / 2)
+      height += diff
+    }
+    return new Rect(x, y, width, height)
+  }
  public floor(): Rect {
    return new Rect(
      Math.floor(this.x),

--- a/src/faceLandmarkNet/FaceLandmarkNet.ts
+++ b/src/faceLandmarkNet/FaceLandmarkNet.ts
@@ -4,9 +4,11 @@ import { convLayer } from '../commons/convLayer';
 import { getImageTensor } from '../commons/getImageTensor';
 import { ConvParams } from '../commons/types';
 import { NetInput } from '../NetInput';
+import { padToSquare } from '../padToSquare';
 import { Point } from '../Point';
 import { toNetInput } from '../toNetInput';
 import { Dimensions, TNetInput } from '../types';
+import { isEven } from '../utils';
 import { extractParams } from './extractParams';
 import { FaceLandmarks } from './FaceLandmarks';
 import { fullyConnectedLayer } from './fullyConnectedLayer';
@@ -41,31 +43,25 @@ export class FaceLandmarkNet {
    this._params = extractParams(weights)
  }
-  public async detectLandmarks(input: tf.Tensor | NetInput | TNetInput) {
+  public forwardTensor(imgTensor: tf.Tensor4D): tf.Tensor2D {
-    if (!this._params) {
+    const params = this._params
+    if (!params) {
      throw new Error('FaceLandmarkNet - load model before inference')
    }
-    const netInput = input instanceof tf.Tensor
+    return tf.tidy(() => {
-      ? input
+      const [batchSize, height, width] = imgTensor.shape.slice()
-      : await toNetInput(input)
-    let imageDimensions: Dimensions | undefined
-    const outTensor = tf.tidy(() => {
-      const params = this._params
-      let imgTensor = getImageTensor(netInput)
-      const [height, width] = imgTensor.shape.slice(1)
-      imageDimensions = { width, height }
+      let x = padToSquare(imgTensor, true)
+      const [heightAfterPadding, widthAfterPadding] = x.shape.slice(1)
      // work with 128 x 128 sized face images
-      if (imgTensor.shape[1] !== 128 || imgTensor.shape[2] !== 128) {
+      if (heightAfterPadding !== 128 || widthAfterPadding !== 128) {
-        imgTensor = tf.image.resizeBilinear(imgTensor, [128, 128])
+        x = tf.image.resizeBilinear(x, [128, 128])
      }
-      let out = conv(imgTensor, params.conv0_params)
+      let out = conv(x, params.conv0_params)
      out = maxPool(out)
      out = conv(out, params.conv1_params)
      out = conv(out, params.conv2_params)
@@ -80,14 +76,58 @@ export class FaceLandmarkNet {
      const fc0 = tf.relu(fullyConnectedLayer(out.as2D(out.shape[0], -1), params.fc0_params))
      const fc1 = fullyConnectedLayer(fc0, params.fc1_params)
-      return fc1
+      const createInterleavedTensor = (fillX: number, fillY: number) =>
+      tf.stack([
+        tf.fill([68], fillX),
+        tf.fill([68], fillY)
+      ], 1).as2D(batchSize, 136)
+      /* shift coordinates back, to undo centered padding
+        ((x * widthAfterPadding) - shiftX) / width
+        ((y * heightAfterPadding) - shiftY) / height
+      */
+      const shiftX = Math.floor(Math.abs(widthAfterPadding - width) / 2)
+      const shiftY = Math.floor(Math.abs(heightAfterPadding - height) / 2)
+      const landmarkTensor = fc1
+        .mul(createInterleavedTensor(widthAfterPadding, heightAfterPadding))
+        .sub(createInterleavedTensor(shiftX, shiftY))
+        .div(createInterleavedTensor(width, height))
+      return landmarkTensor as tf.Tensor2D
+    })
+  }
+  public async forward(input: tf.Tensor | NetInput | TNetInput): Promise<tf.Tensor2D> {
+    const netInput = input instanceof tf.Tensor
+      ? input
+      : await toNetInput(input)
+    return this.forwardTensor(getImageTensor(netInput))
+  }
+  public async detectLandmarks(input: tf.Tensor | NetInput | TNetInput) {
+    const netInput = input instanceof tf.Tensor
+      ? input
+      : await toNetInput(input)
+    let imageDimensions: Dimensions | undefined
+    const outTensor = tf.tidy(() => {
+      const imgTensor = getImageTensor(netInput)
+      const [height, width] = imgTensor.shape.slice(1)
+      imageDimensions = { width, height }
+      return this.forwardTensor(imgTensor)
    })
    const faceLandmarksArray = Array.from(await outTensor.data())
    outTensor.dispose()
-    const xCoords = faceLandmarksArray.filter((c, i) => (i - 1) % 2)
+    const xCoords = faceLandmarksArray.filter((_, i) => isEven(i))
-    const yCoords = faceLandmarksArray.filter((c, i) => i % 2)
+    const yCoords = faceLandmarksArray.filter((_, i) => !isEven(i))
    return new FaceLandmarks(
      Array(68).fill(0).map((_, i) => new Point(xCoords[i], yCoords[i])),

--- a/test/tests/e2e/faceLandmarkNet.test.ts
+++ b/test/tests/e2e/faceLandmarkNet.test.ts
@@ -31,7 +31,10 @@ describe('faceLandmarkNet', () => {
      expect(result.getImageHeight()).toEqual(height)
      expect(result.getShift().x).toEqual(0)
      expect(result.getShift().y).toEqual(0)
-      expect(result.getPositions().map(({ x, y }) => ({ x, y }))).toEqual(faceLandmarkPositions)
+      result.getPositions().forEach(({ x, y }, i) => {
+        expectMaxDelta(x, faceLandmarkPositions[i].x, 0.1)
+        expectMaxDelta(y, faceLandmarkPositions[i].y, 0.1)
+      })
    })
  })