Commit 554bbe05 by vincent

pad input of face landmark net to square and center to avoid stretching of non square images

parent fd698812
...@@ -18,6 +18,20 @@ export class Rect implements IRect { ...@@ -18,6 +18,20 @@ export class Rect implements IRect {
this.height = height this.height = height
} }
public toSquare(): Rect {
let { x, y, width, height } = this
const diff = Math.abs(width - height)
if (width < height) {
x -= (diff / 2)
width += diff
}
if (height < width) {
y -= (diff / 2)
height += diff
}
return new Rect(x, y, width, height)
}
public floor(): Rect { public floor(): Rect {
return new Rect( return new Rect(
Math.floor(this.x), Math.floor(this.x),
......
...@@ -4,9 +4,11 @@ import { convLayer } from '../commons/convLayer'; ...@@ -4,9 +4,11 @@ import { convLayer } from '../commons/convLayer';
import { getImageTensor } from '../commons/getImageTensor'; import { getImageTensor } from '../commons/getImageTensor';
import { ConvParams } from '../commons/types'; import { ConvParams } from '../commons/types';
import { NetInput } from '../NetInput'; import { NetInput } from '../NetInput';
import { padToSquare } from '../padToSquare';
import { Point } from '../Point'; import { Point } from '../Point';
import { toNetInput } from '../toNetInput'; import { toNetInput } from '../toNetInput';
import { Dimensions, TNetInput } from '../types'; import { Dimensions, TNetInput } from '../types';
import { isEven } from '../utils';
import { extractParams } from './extractParams'; import { extractParams } from './extractParams';
import { FaceLandmarks } from './FaceLandmarks'; import { FaceLandmarks } from './FaceLandmarks';
import { fullyConnectedLayer } from './fullyConnectedLayer'; import { fullyConnectedLayer } from './fullyConnectedLayer';
...@@ -41,31 +43,25 @@ export class FaceLandmarkNet { ...@@ -41,31 +43,25 @@ export class FaceLandmarkNet {
this._params = extractParams(weights) this._params = extractParams(weights)
} }
public async detectLandmarks(input: tf.Tensor | NetInput | TNetInput) { public forwardTensor(imgTensor: tf.Tensor4D): tf.Tensor2D {
if (!this._params) { const params = this._params
if (!params) {
throw new Error('FaceLandmarkNet - load model before inference') throw new Error('FaceLandmarkNet - load model before inference')
} }
const netInput = input instanceof tf.Tensor return tf.tidy(() => {
? input const [batchSize, height, width] = imgTensor.shape.slice()
: await toNetInput(input)
let imageDimensions: Dimensions | undefined
const outTensor = tf.tidy(() => {
const params = this._params
let imgTensor = getImageTensor(netInput)
const [height, width] = imgTensor.shape.slice(1)
imageDimensions = { width, height }
let x = padToSquare(imgTensor, true)
const [heightAfterPadding, widthAfterPadding] = x.shape.slice(1)
// work with 128 x 128 sized face images // work with 128 x 128 sized face images
if (imgTensor.shape[1] !== 128 || imgTensor.shape[2] !== 128) { if (heightAfterPadding !== 128 || widthAfterPadding !== 128) {
imgTensor = tf.image.resizeBilinear(imgTensor, [128, 128]) x = tf.image.resizeBilinear(x, [128, 128])
} }
let out = conv(imgTensor, params.conv0_params) let out = conv(x, params.conv0_params)
out = maxPool(out) out = maxPool(out)
out = conv(out, params.conv1_params) out = conv(out, params.conv1_params)
out = conv(out, params.conv2_params) out = conv(out, params.conv2_params)
...@@ -80,14 +76,58 @@ export class FaceLandmarkNet { ...@@ -80,14 +76,58 @@ export class FaceLandmarkNet {
const fc0 = tf.relu(fullyConnectedLayer(out.as2D(out.shape[0], -1), params.fc0_params)) const fc0 = tf.relu(fullyConnectedLayer(out.as2D(out.shape[0], -1), params.fc0_params))
const fc1 = fullyConnectedLayer(fc0, params.fc1_params) const fc1 = fullyConnectedLayer(fc0, params.fc1_params)
return fc1
const createInterleavedTensor = (fillX: number, fillY: number) =>
tf.stack([
tf.fill([68], fillX),
tf.fill([68], fillY)
], 1).as2D(batchSize, 136)
/* shift coordinates back, to undo centered padding
((x * widthAfterPadding) - shiftX) / width
((y * heightAfterPadding) - shiftY) / height
*/
const shiftX = Math.floor(Math.abs(widthAfterPadding - width) / 2)
const shiftY = Math.floor(Math.abs(heightAfterPadding - height) / 2)
const landmarkTensor = fc1
.mul(createInterleavedTensor(widthAfterPadding, heightAfterPadding))
.sub(createInterleavedTensor(shiftX, shiftY))
.div(createInterleavedTensor(width, height))
return landmarkTensor as tf.Tensor2D
})
}
public async forward(input: tf.Tensor | NetInput | TNetInput): Promise<tf.Tensor2D> {
const netInput = input instanceof tf.Tensor
? input
: await toNetInput(input)
return this.forwardTensor(getImageTensor(netInput))
}
public async detectLandmarks(input: tf.Tensor | NetInput | TNetInput) {
const netInput = input instanceof tf.Tensor
? input
: await toNetInput(input)
let imageDimensions: Dimensions | undefined
const outTensor = tf.tidy(() => {
const imgTensor = getImageTensor(netInput)
const [height, width] = imgTensor.shape.slice(1)
imageDimensions = { width, height }
return this.forwardTensor(imgTensor)
}) })
const faceLandmarksArray = Array.from(await outTensor.data()) const faceLandmarksArray = Array.from(await outTensor.data())
outTensor.dispose() outTensor.dispose()
const xCoords = faceLandmarksArray.filter((c, i) => (i - 1) % 2) const xCoords = faceLandmarksArray.filter((_, i) => isEven(i))
const yCoords = faceLandmarksArray.filter((c, i) => i % 2) const yCoords = faceLandmarksArray.filter((_, i) => !isEven(i))
return new FaceLandmarks( return new FaceLandmarks(
Array(68).fill(0).map((_, i) => new Point(xCoords[i], yCoords[i])), Array(68).fill(0).map((_, i) => new Point(xCoords[i], yCoords[i])),
......
...@@ -31,7 +31,10 @@ describe('faceLandmarkNet', () => { ...@@ -31,7 +31,10 @@ describe('faceLandmarkNet', () => {
expect(result.getImageHeight()).toEqual(height) expect(result.getImageHeight()).toEqual(height)
expect(result.getShift().x).toEqual(0) expect(result.getShift().x).toEqual(0)
expect(result.getShift().y).toEqual(0) expect(result.getShift().y).toEqual(0)
expect(result.getPositions().map(({ x, y }) => ({ x, y }))).toEqual(faceLandmarkPositions) result.getPositions().forEach(({ x, y }, i) => {
expectMaxDelta(x, faceLandmarkPositions[i].x, 0.1)
expectMaxDelta(y, faceLandmarkPositions[i].y, 0.1)
})
}) })
}) })
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment