Commit 554bbe05 by vincent

pad input of face landmark net to square and center to avoid stretching of non square images

parent fd698812
......@@ -18,6 +18,20 @@ export class Rect implements IRect {
this.height = height
}
public toSquare(): Rect {
let { x, y, width, height } = this
const diff = Math.abs(width - height)
if (width < height) {
x -= (diff / 2)
width += diff
}
if (height < width) {
y -= (diff / 2)
height += diff
}
return new Rect(x, y, width, height)
}
public floor(): Rect {
return new Rect(
Math.floor(this.x),
......
......@@ -4,9 +4,11 @@ import { convLayer } from '../commons/convLayer';
import { getImageTensor } from '../commons/getImageTensor';
import { ConvParams } from '../commons/types';
import { NetInput } from '../NetInput';
import { padToSquare } from '../padToSquare';
import { Point } from '../Point';
import { toNetInput } from '../toNetInput';
import { Dimensions, TNetInput } from '../types';
import { isEven } from '../utils';
import { extractParams } from './extractParams';
import { FaceLandmarks } from './FaceLandmarks';
import { fullyConnectedLayer } from './fullyConnectedLayer';
......@@ -41,31 +43,25 @@ export class FaceLandmarkNet {
this._params = extractParams(weights)
}
public async detectLandmarks(input: tf.Tensor | NetInput | TNetInput) {
if (!this._params) {
public forwardTensor(imgTensor: tf.Tensor4D): tf.Tensor2D {
const params = this._params
if (!params) {
throw new Error('FaceLandmarkNet - load model before inference')
}
const netInput = input instanceof tf.Tensor
? input
: await toNetInput(input)
let imageDimensions: Dimensions | undefined
const outTensor = tf.tidy(() => {
const params = this._params
let imgTensor = getImageTensor(netInput)
const [height, width] = imgTensor.shape.slice(1)
imageDimensions = { width, height }
return tf.tidy(() => {
const [batchSize, height, width] = imgTensor.shape.slice()
let x = padToSquare(imgTensor, true)
const [heightAfterPadding, widthAfterPadding] = x.shape.slice(1)
// work with 128 x 128 sized face images
if (imgTensor.shape[1] !== 128 || imgTensor.shape[2] !== 128) {
imgTensor = tf.image.resizeBilinear(imgTensor, [128, 128])
if (heightAfterPadding !== 128 || widthAfterPadding !== 128) {
x = tf.image.resizeBilinear(x, [128, 128])
}
let out = conv(imgTensor, params.conv0_params)
let out = conv(x, params.conv0_params)
out = maxPool(out)
out = conv(out, params.conv1_params)
out = conv(out, params.conv2_params)
......@@ -80,14 +76,58 @@ export class FaceLandmarkNet {
const fc0 = tf.relu(fullyConnectedLayer(out.as2D(out.shape[0], -1), params.fc0_params))
const fc1 = fullyConnectedLayer(fc0, params.fc1_params)
return fc1
const createInterleavedTensor = (fillX: number, fillY: number) =>
tf.stack([
tf.fill([68], fillX),
tf.fill([68], fillY)
], 1).as2D(batchSize, 136)
/* shift coordinates back, to undo centered padding
((x * widthAfterPadding) - shiftX) / width
((y * heightAfterPadding) - shiftY) / height
*/
const shiftX = Math.floor(Math.abs(widthAfterPadding - width) / 2)
const shiftY = Math.floor(Math.abs(heightAfterPadding - height) / 2)
const landmarkTensor = fc1
.mul(createInterleavedTensor(widthAfterPadding, heightAfterPadding))
.sub(createInterleavedTensor(shiftX, shiftY))
.div(createInterleavedTensor(width, height))
return landmarkTensor as tf.Tensor2D
})
}
public async forward(input: tf.Tensor | NetInput | TNetInput): Promise<tf.Tensor2D> {
const netInput = input instanceof tf.Tensor
? input
: await toNetInput(input)
return this.forwardTensor(getImageTensor(netInput))
}
public async detectLandmarks(input: tf.Tensor | NetInput | TNetInput) {
const netInput = input instanceof tf.Tensor
? input
: await toNetInput(input)
let imageDimensions: Dimensions | undefined
const outTensor = tf.tidy(() => {
const imgTensor = getImageTensor(netInput)
const [height, width] = imgTensor.shape.slice(1)
imageDimensions = { width, height }
return this.forwardTensor(imgTensor)
})
const faceLandmarksArray = Array.from(await outTensor.data())
outTensor.dispose()
const xCoords = faceLandmarksArray.filter((c, i) => (i - 1) % 2)
const yCoords = faceLandmarksArray.filter((c, i) => i % 2)
const xCoords = faceLandmarksArray.filter((_, i) => isEven(i))
const yCoords = faceLandmarksArray.filter((_, i) => !isEven(i))
return new FaceLandmarks(
Array(68).fill(0).map((_, i) => new Point(xCoords[i], yCoords[i])),
......
......@@ -31,7 +31,10 @@ describe('faceLandmarkNet', () => {
expect(result.getImageHeight()).toEqual(height)
expect(result.getShift().x).toEqual(0)
expect(result.getShift().y).toEqual(0)
expect(result.getPositions().map(({ x, y }) => ({ x, y }))).toEqual(faceLandmarkPositions)
result.getPositions().forEach(({ x, y }, i) => {
expectMaxDelta(x, faceLandmarkPositions[i].x, 0.1)
expectMaxDelta(y, faceLandmarkPositions[i].y, 0.1)
})
})
})
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment