Commit 5753e5d0 by vincent

added handling of batch inputs + face landmark net works with batch inputs now

parent 1c89e90a
...@@ -84,9 +84,12 @@ ...@@ -84,9 +84,12 @@
const detections = await faceapi.locateFaces(input, minConfidence) const detections = await faceapi.locateFaces(input, minConfidence)
faceapi.drawDetection('overlay', detections.map(det => det.forSize(width, height))) faceapi.drawDetection('overlay', detections.map(det => det.forSize(width, height)))
const faceImages = await faceapi.extractFaces(input.canvases[0], detections) const faceImages = await faceapi.extractFaces(input.inputs[0], detections)
$('#facesContainer').empty() $('#facesContainer').empty()
faceImages.forEach(canvas => $('#facesContainer').append(canvas)) faceImages.forEach(canvas => $('#facesContainer').append(canvas))
// free memory for input tensors
input.dispose()
} }
async function onSelectionChanged(uri) { async function onSelectionChanged(uri) {
......
...@@ -103,6 +103,9 @@ ...@@ -103,6 +103,9 @@
faceapi.drawLandmarks(canvas, landmarksByFace, { lineWidth: drawLines ? 2 : 4, drawLines, color: 'red' }) faceapi.drawLandmarks(canvas, landmarksByFace, { lineWidth: drawLines ? 2 : 4, drawLines, color: 'red' })
faceapi.drawDetection('overlay', locations.map(det => det.forSize(width, height))) faceapi.drawDetection('overlay', locations.map(det => det.forSize(width, height)))
// free memory for input tensors
input.dispose()
} }
async function run() { async function run() {
......
...@@ -84,7 +84,7 @@ ...@@ -84,7 +84,7 @@
const input = await faceapi.toNetInput(inputImgEl) const input = await faceapi.toNetInput(inputImgEl)
const locations = await faceapi.locateFaces(input, minConfidence) const locations = await faceapi.locateFaces(input, minConfidence)
const faceImages = await faceapi.extractFaces(input.canvases[0], locations) const faceImages = await faceapi.extractFaces(input.inputs[0], locations)
// detect landmarks and get the aligned face image bounding boxes // detect landmarks and get the aligned face image bounding boxes
const alignedFaceBoxes = await Promise.all(faceImages.map( const alignedFaceBoxes = await Promise.all(faceImages.map(
...@@ -93,7 +93,10 @@ ...@@ -93,7 +93,10 @@
return faceLandmarks.align(locations[i]) return faceLandmarks.align(locations[i])
} }
)) ))
const alignedFaceImages = await faceapi.extractFaces(input.canvases[0], alignedFaceBoxes) const alignedFaceImages = await faceapi.extractFaces(input.inputs[0], alignedFaceBoxes)
// free memory for input tensors
input.dispose()
$('#facesContainer').empty() $('#facesContainer').empty()
faceImages.forEach(async (faceCanvas, i) => { faceImages.forEach(async (faceCanvas, i) => {
......
...@@ -72,14 +72,13 @@ ...@@ -72,14 +72,13 @@
if(videoEl.paused || videoEl.ended || !modelLoaded) if(videoEl.paused || videoEl.ended || !modelLoaded)
return false return false
const input = await faceapi.toNetInput(videoEl) const { width, height } = faceapi.getMediaDimensions(videoEl)
const { width, height } = input
const canvas = $('#overlay').get(0) const canvas = $('#overlay').get(0)
canvas.width = width canvas.width = width
canvas.height = height canvas.height = height
const ts = Date.now() const ts = Date.now()
result = await faceapi.locateFaces(input, minConfidence) result = await faceapi.locateFaces(videoEl, minConfidence)
displayTimeStats(Date.now() - ts) displayTimeStats(Date.now() - ts)
faceapi.drawDetection('overlay', result.map(det => det.forSize(width, height))) faceapi.drawDetection('overlay', result.map(det => det.forSize(width, height)))
......
import { Dimensions, TMediaElement } from './types'; import * as tf from '@tensorflow/tfjs-core';
import { isTensor3D, isTensor4D } from './commons/isTensor';
import { padToSquare } from './padToSquare';
import { Point } from './Point';
import { TResolvedNetInput } from './types';
import { createCanvasFromMedia } from './utils'; import { createCanvasFromMedia } from './utils';
export class NetInput { export class NetInput {
private _canvases: HTMLCanvasElement[] private _inputs: tf.Tensor3D[] = []
private _isManaged: boolean = false
constructor( private _inputDimensions: number[][] = []
medias: Array<TMediaElement>, private _paddings: Point[] = []
dims?: Dimensions
) { constructor(inputs: tf.Tensor4D | Array<TResolvedNetInput>) {
this._canvases = [] if (isTensor4D(inputs)) {
medias.forEach(m => this.initCanvas(m, dims)) this._inputs = tf.unstack(inputs as tf.Tensor4D) as tf.Tensor3D[]
} }
if (Array.isArray(inputs)) {
this._inputs = inputs.map(input => {
if (isTensor3D(input)) {
// TODO: make sure not to dispose original tensors passed in by the user
return tf.clone(input as tf.Tensor3D)
}
private initCanvas(media: TMediaElement, dims?: Dimensions) { return tf.fromPixels(
if (media instanceof HTMLCanvasElement) { input instanceof HTMLCanvasElement ? input : createCanvasFromMedia(input as HTMLImageElement | HTMLVideoElement)
this._canvases.push(media) )
return })
} }
this._inputDimensions = this._inputs.map(t => t.shape)
}
public get inputs(): tf.Tensor3D[] {
return this._inputs
}
public get isManaged(): boolean {
return this._isManaged
}
public get batchSize(): number {
return this._inputs.length
}
public get inputDimensions(): number[][] {
return this._inputDimensions
}
public get paddings(): Point[] {
return this._paddings
}
public getInputDimensions(batchIdx: number): number[] {
return this._inputDimensions[batchIdx]
}
public getInputHeight(batchIdx: number): number {
return this._inputDimensions[batchIdx][0]
}
public getInputWidth(batchIdx: number): number {
return this._inputDimensions[batchIdx][1]
}
public getPaddings(batchIdx: number): Point {
return this._paddings[batchIdx]
}
public toBatchTensor(inputSize: number, isCenterInputs: boolean = true): tf.Tensor4D {
return tf.tidy(() => {
const inputTensors = this._inputs.map((inputTensor: tf.Tensor3D) => {
const [originalHeight, originalWidth] = inputTensor.shape
let imgTensor = inputTensor.expandDims().toFloat() as tf.Tensor4D
imgTensor = padToSquare(imgTensor, isCenterInputs)
const [heightAfterPadding, widthAfterPadding] = imgTensor.shape.slice(1)
if (heightAfterPadding !== inputSize || widthAfterPadding !== inputSize) {
imgTensor = tf.image.resizeBilinear(imgTensor, [inputSize, inputSize])
}
this._paddings.push(new Point(
widthAfterPadding - originalWidth,
heightAfterPadding - originalHeight
))
return imgTensor
})
const batchTensor = tf.stack(inputTensors).as4D(this.batchSize, inputSize, inputSize, 3)
if (this.isManaged) {
this.dispose()
}
return batchTensor
})
}
this._canvases.push(createCanvasFromMedia(media, dims)) /**
* By setting the isManaged flag, all newly created tensors will be automatically
* automatically disposed after the batch tensor has been created
*/
public managed() {
this._isManaged = true
return this
} }
public get canvases() : HTMLCanvasElement[] { public dispose() {
return this._canvases this._inputs.forEach(t => t.dispose())
} }
} }
\ No newline at end of file
import * as tf from '@tensorflow/tfjs-core';
import { extractFaceTensors } from './extractFaceTensors'; import { extractFaceTensors } from './extractFaceTensors';
import { FaceDetectionNet } from './faceDetectionNet/FaceDetectionNet'; import { FaceDetectionNet } from './faceDetectionNet/FaceDetectionNet';
import { FaceLandmarkNet } from './faceLandmarkNet/FaceLandmarkNet'; import { FaceLandmarkNet } from './faceLandmarkNet/FaceLandmarkNet';
import { FaceLandmarks } from './faceLandmarkNet/FaceLandmarks'; import { FaceLandmarks } from './faceLandmarkNet/FaceLandmarks';
import { FaceRecognitionNet } from './faceRecognitionNet/FaceRecognitionNet'; import { FaceRecognitionNet } from './faceRecognitionNet/FaceRecognitionNet';
import { FullFaceDescription } from './FullFaceDescription'; import { FullFaceDescription } from './FullFaceDescription';
import { NetInput } from './NetInput';
import { TNetInput } from './types'; import { TNetInput } from './types';
export function allFacesFactory( export function allFacesFactory(
...@@ -15,22 +12,25 @@ export function allFacesFactory( ...@@ -15,22 +12,25 @@ export function allFacesFactory(
recognitionNet: FaceRecognitionNet recognitionNet: FaceRecognitionNet
) { ) {
return async function( return async function(
input: tf.Tensor | NetInput | TNetInput, input: TNetInput,
minConfidence: number minConfidence: number
): Promise<FullFaceDescription[]> { ): Promise<FullFaceDescription[]> {
const detections = await detectionNet.locateFaces(input, minConfidence) const detections = await detectionNet.locateFaces(input, minConfidence)
const faceTensors = await extractFaceTensors(input, detections) const faceTensors = await extractFaceTensors(input, detections)
/**
const faceLandmarksByFace = await Promise.all(faceTensors.map( const faceLandmarksByFace = await Promise.all(faceTensors.map(
faceTensor => landmarkNet.detectLandmarks(faceTensor) faceTensor => landmarkNet.detectLandmarks(faceTensor)
)) as FaceLandmarks[] )) as FaceLandmarks[]
*/
const faceLandmarksByFace = await landmarkNet.detectLandmarks(faceTensors) as FaceLandmarks[]
faceTensors.forEach(t => t.dispose()) faceTensors.forEach(t => t.dispose())
const alignedFaceBoxes = await Promise.all(faceLandmarksByFace.map( const alignedFaceBoxes = faceLandmarksByFace.map(
(landmarks, i) => landmarks.align(detections[i].getBox()) (landmarks, i) => landmarks.align(detections[i].getBox())
)) )
const alignedFaceTensors = await extractFaceTensors(input, alignedFaceBoxes) const alignedFaceTensors = await extractFaceTensors(input, alignedFaceBoxes)
const descriptors = await Promise.all(alignedFaceTensors.map( const descriptors = await Promise.all(alignedFaceTensors.map(
......
import * as tf from '@tensorflow/tfjs-core';
import { NetInput } from '../NetInput';
import { tensorTo4D } from './tensorTo4D';
export function getImageTensor(input: tf.Tensor | NetInput): tf.Tensor4D {
return tf.tidy(() => {
if (input instanceof tf.Tensor) {
return tensorTo4D(input)
}
if (!(input instanceof NetInput)) {
throw new Error('getImageTensor - expected input to be a tensor or an instance of NetInput')
}
if (input.canvases.length > 1) {
throw new Error('getImageTensor - batch input is not accepted here')
}
return tf.fromPixels(input.canvases[0]).expandDims(0).toFloat() as tf.Tensor4D
})
}
\ No newline at end of file
export function isMediaElement(input: any) {
return input instanceof HTMLImageElement
|| input instanceof HTMLVideoElement
|| input instanceof HTMLCanvasElement
}
\ No newline at end of file
import * as tf from '@tensorflow/tfjs-core'; import * as tf from '@tensorflow/tfjs-core';
export function isTensor(tensor: tf.Tensor, dim: number) { export function isTensor(tensor: any, dim: number) {
return tensor instanceof tf.Tensor && tensor.shape.length === dim return tensor instanceof tf.Tensor && tensor.shape.length === dim
} }
export function isTensor1D(tensor: tf.Tensor) { export function isTensor1D(tensor: any) {
return isTensor(tensor, 1) return isTensor(tensor, 1)
} }
export function isTensor2D(tensor: tf.Tensor) { export function isTensor2D(tensor: any) {
return isTensor(tensor, 2) return isTensor(tensor, 2)
} }
export function isTensor3D(tensor: tf.Tensor) { export function isTensor3D(tensor: any) {
return isTensor(tensor, 3) return isTensor(tensor, 3)
} }
export function isTensor4D(tensor: tf.Tensor) { export function isTensor4D(tensor: any) {
return isTensor(tensor, 4) return isTensor(tensor, 4)
} }
\ No newline at end of file
import * as tf from '@tensorflow/tfjs-core';
import { NetInput } from '../NetInput';
import { padToSquare } from '../padToSquare';
import { tensorTo4D } from './tensorTo4D';
import { BatchReshapeInfo } from './types';
export function toInputTensor(
input: tf.Tensor | tf.Tensor[] | NetInput,
inputSize: number,
center: boolean = true
): { batchTensor: tf.Tensor4D, batchInfo: BatchReshapeInfo[] } {
if (!(input instanceof tf.Tensor) && !(input instanceof NetInput)) {
throw new Error('toInputTensor - expected input to be a tensor of an instance of NetInput')
}
return tf.tidy(() => {
const inputTensors = input instanceof NetInput
? input.canvases.map(c => tf.expandDims(tf.fromPixels(c)))
: [tensorTo4D(input)]
const preprocessedTensors: tf.Tensor4D[] = []
const batchInfo: BatchReshapeInfo[] = []
inputTensors.forEach((inputTensor: tf.Tensor4D) => {
const [originalHeight, originalWidth] = inputTensor.shape.slice(1)
let imgTensor = padToSquare(inputTensor.toFloat(), center)
const [heightAfterPadding, widthAfterPadding] = imgTensor.shape.slice(1)
if (heightAfterPadding !== inputSize || widthAfterPadding !== inputSize) {
imgTensor = tf.image.resizeBilinear(imgTensor, [inputSize, inputSize])
}
preprocessedTensors.push(imgTensor)
batchInfo.push({
originalWidth,
originalHeight,
paddingX: widthAfterPadding - originalWidth,
paddingY: heightAfterPadding - originalHeight
})
})
const batchSize = inputTensors.length
return {
batchTensor: tf.stack(preprocessedTensors).as4D(batchSize, inputSize, inputSize, 3),
batchInfo
}
})
}
\ No newline at end of file
import { FaceDetection } from '../faceDetectionNet/FaceDetection'; import { FaceDetection } from '../faceDetectionNet/FaceDetection';
import { FaceLandmarks } from '../faceLandmarkNet/FaceLandmarks'; import { FaceLandmarks } from '../faceLandmarkNet/FaceLandmarks';
import { Point } from '../Point'; import { Point } from '../Point';
import { getContext2dOrThrow, getElement, round } from '../utils'; import { getContext2dOrThrow, resolveInput, round } from '../utils';
import { DrawBoxOptions, DrawLandmarksOptions, DrawOptions, DrawTextOptions } from './types'; import { DrawBoxOptions, DrawLandmarksOptions, DrawOptions, DrawTextOptions } from './types';
export function getDefaultDrawOptions(): DrawOptions { export function getDefaultDrawOptions(): DrawOptions {
...@@ -55,7 +55,7 @@ export function drawDetection( ...@@ -55,7 +55,7 @@ export function drawDetection(
detection: FaceDetection | FaceDetection[], detection: FaceDetection | FaceDetection[],
options?: DrawBoxOptions & DrawTextOptions & { withScore: boolean } options?: DrawBoxOptions & DrawTextOptions & { withScore: boolean }
) { ) {
const canvas = getElement(canvasArg) const canvas = resolveInput(canvasArg)
if (!(canvas instanceof HTMLCanvasElement)) { if (!(canvas instanceof HTMLCanvasElement)) {
throw new Error('drawBox - expected canvas to be of type: HTMLCanvasElement') throw new Error('drawBox - expected canvas to be of type: HTMLCanvasElement')
} }
...@@ -132,7 +132,7 @@ export function drawLandmarks( ...@@ -132,7 +132,7 @@ export function drawLandmarks(
faceLandmarks: FaceLandmarks | FaceLandmarks[], faceLandmarks: FaceLandmarks | FaceLandmarks[],
options?: DrawLandmarksOptions & { drawLines: boolean } options?: DrawLandmarksOptions & { drawLines: boolean }
) { ) {
const canvas = getElement(canvasArg) const canvas = resolveInput(canvasArg)
if (!(canvas instanceof HTMLCanvasElement)) { if (!(canvas instanceof HTMLCanvasElement)) {
throw new Error('drawLandmarks - expected canvas to be of type: HTMLCanvasElement') throw new Error('drawLandmarks - expected canvas to be of type: HTMLCanvasElement')
} }
......
import * as tf from '@tensorflow/tfjs-core'; import * as tf from '@tensorflow/tfjs-core';
import { getImageTensor } from './commons/getImageTensor';
import { FaceDetection } from './faceDetectionNet/FaceDetection'; import { FaceDetection } from './faceDetectionNet/FaceDetection';
import { NetInput } from './NetInput';
import { Rect } from './Rect'; import { Rect } from './Rect';
import { toNetInput } from './toNetInput'; import { toNetInput } from './toNetInput';
import { TNetInput } from './types'; import { TNetInput } from './types';
...@@ -18,16 +16,21 @@ import { TNetInput } from './types'; ...@@ -18,16 +16,21 @@ import { TNetInput } from './types';
* @returns Tensors of the corresponding image region for each detected face. * @returns Tensors of the corresponding image region for each detected face.
*/ */
export async function extractFaceTensors( export async function extractFaceTensors(
input: tf.Tensor | NetInput | TNetInput, input: TNetInput,
detections: Array<FaceDetection|Rect> detections: Array<FaceDetection | Rect>
): Promise<tf.Tensor4D[]> { ): Promise<tf.Tensor4D[]> {
const image = input instanceof tf.Tensor const netInput = await toNetInput(input, true)
? input
: await toNetInput(input) if (netInput.batchSize > 1) {
if (netInput.isManaged) {
netInput.dispose()
}
throw new Error('extractFaceTensors - batchSize > 1 not supported')
}
return tf.tidy(() => { return tf.tidy(() => {
const imgTensor = getImageTensor(image) const imgTensor = netInput.inputs[0].expandDims().toFloat() as tf.Tensor4D
const [imgHeight, imgWidth, numChannels] = imgTensor.shape.slice(1) const [imgHeight, imgWidth, numChannels] = imgTensor.shape.slice(1)
...@@ -40,6 +43,9 @@ export async function extractFaceTensors( ...@@ -40,6 +43,9 @@ export async function extractFaceTensors(
tf.slice(imgTensor, [0, y, x, 0], [1, height, width, numChannels]) tf.slice(imgTensor, [0, y, x, 0], [1, height, width, numChannels])
) )
if (netInput.isManaged) {
netInput.dispose()
}
return faceTensors return faceTensors
}) })
} }
\ No newline at end of file
import { FaceDetection } from './faceDetectionNet/FaceDetection'; import { FaceDetection } from './faceDetectionNet/FaceDetection';
import { Rect } from './Rect'; import { Rect } from './Rect';
import { createCanvas, getContext2dOrThrow } from './utils'; import { toNetInput } from './toNetInput';
import { TNetInput } from './types';
import { createCanvas, getContext2dOrThrow, imageTensorToCanvas } from './utils';
/** /**
* Extracts the image regions containing the detected faces. * Extracts the image regions containing the detected faces.
...@@ -9,15 +11,31 @@ import { createCanvas, getContext2dOrThrow } from './utils'; ...@@ -9,15 +11,31 @@ import { createCanvas, getContext2dOrThrow } from './utils';
* @param detections The face detection results or face bounding boxes for that image. * @param detections The face detection results or face bounding boxes for that image.
* @returns The Canvases of the corresponding image region for each detected face. * @returns The Canvases of the corresponding image region for each detected face.
*/ */
export function extractFaces( export async function extractFaces(
image: HTMLCanvasElement, input: TNetInput,
detections: Array<FaceDetection|Rect> detections: Array<FaceDetection | Rect>
): HTMLCanvasElement[] { ): Promise<HTMLCanvasElement[]> {
const ctx = getContext2dOrThrow(image)
let canvas = input as HTMLCanvasElement
if (!(input instanceof HTMLCanvasElement)) {
const netInput = await toNetInput(input, true)
if (netInput.batchSize > 1) {
if (netInput.isManaged) {
netInput.dispose()
}
throw new Error('extractFaces - batchSize > 1 not supported')
}
canvas = await imageTensorToCanvas(netInput.inputs[0])
}
const ctx = getContext2dOrThrow(canvas)
const boxes = detections.map( const boxes = detections.map(
det => det instanceof FaceDetection det => det instanceof FaceDetection
? det.forSize(image.width, image.height).getBox().floor() ? det.forSize(canvas.width, canvas.height).getBox().floor()
: det : det
) )
return boxes.map(({ x, y, width, height }) => { return boxes.map(({ x, y, width, height }) => {
......
import * as tf from '@tensorflow/tfjs-core'; import * as tf from '@tensorflow/tfjs-core';
import { getImageTensor } from '../commons/getImageTensor';
import { NetInput } from '../NetInput'; import { NetInput } from '../NetInput';
import { padToSquare } from '../padToSquare';
import { Rect } from '../Rect'; import { Rect } from '../Rect';
import { toNetInput } from '../toNetInput'; import { toNetInput } from '../toNetInput';
import { Dimensions, TNetInput } from '../types'; import { TNetInput } from '../types';
import { extractParams } from './extractParams'; import { extractParams } from './extractParams';
import { FaceDetection } from './FaceDetection'; import { FaceDetection } from './FaceDetection';
import { loadQuantizedParams } from './loadQuantizedParams'; import { loadQuantizedParams } from './loadQuantizedParams';
...@@ -13,7 +11,6 @@ import { mobileNetV1 } from './mobileNetV1'; ...@@ -13,7 +11,6 @@ import { mobileNetV1 } from './mobileNetV1';
import { nonMaxSuppression } from './nonMaxSuppression'; import { nonMaxSuppression } from './nonMaxSuppression';
import { outputLayer } from './outputLayer'; import { outputLayer } from './outputLayer';
import { predictionLayer } from './predictionLayer'; import { predictionLayer } from './predictionLayer';
import { resizeLayer } from './resizeLayer';
import { NetParams } from './types'; import { NetParams } from './types';
export class FaceDetectionNet { export class FaceDetectionNet {
...@@ -36,15 +33,16 @@ export class FaceDetectionNet { ...@@ -36,15 +33,16 @@ export class FaceDetectionNet {
this._params = extractParams(weights) this._params = extractParams(weights)
} }
private forwardTensor(imgTensor: tf.Tensor4D) { public forwardInput(input: NetInput) {
if (!this._params) { if (!this._params) {
throw new Error('FaceDetectionNet - load model before inference') throw new Error('FaceDetectionNet - load model before inference')
} }
return tf.tidy(() => { return tf.tidy(() => {
const batchTensor = input.toBatchTensor(512, false)
const resized = resizeLayer(imgTensor) as tf.Tensor4D const x = tf.sub(tf.mul(batchTensor, tf.scalar(0.007843137718737125)), tf.scalar(1)) as tf.Tensor4D
const features = mobileNetV1(resized, this._params.mobilenetv1_params) const features = mobileNetV1(x, this._params.mobilenetv1_params)
const { const {
boxPredictions, boxPredictions,
...@@ -55,44 +53,23 @@ export class FaceDetectionNet { ...@@ -55,44 +53,23 @@ export class FaceDetectionNet {
}) })
} }
public async forward(input: tf.Tensor | NetInput | TNetInput) { public async forward(input: TNetInput) {
const netInput = input instanceof tf.Tensor return this.forwardInput(await toNetInput(input, true))
? input
: await toNetInput(input)
return tf.tidy(() =>
this.forwardTensor(padToSquare(getImageTensor(netInput)))
)
} }
public async locateFaces( public async locateFaces(
input: tf.Tensor | NetInput | TNetInput, input: TNetInput,
minConfidence: number = 0.8, minConfidence: number = 0.8,
maxResults: number = 100, maxResults: number = 100,
): Promise<FaceDetection[]> { ): Promise<FaceDetection[]> {
const netInput = input instanceof tf.Tensor const netInput = await toNetInput(input, true)
? input
: await toNetInput(input)
let paddedHeightRelative = 1, paddedWidthRelative = 1
let imageDimensions: Dimensions | undefined
const { const {
boxes: _boxes, boxes: _boxes,
scores: _scores scores: _scores
} = tf.tidy(() => { } = this.forwardInput(netInput)
let imgTensor = getImageTensor(netInput)
const [height, width] = imgTensor.shape.slice(1)
imageDimensions = { width, height }
imgTensor = padToSquare(imgTensor)
paddedHeightRelative = imgTensor.shape[1] / height
paddedWidthRelative = imgTensor.shape[2] / width
return this.forwardTensor(imgTensor)
})
// TODO batches // TODO batches
const boxes = _boxes[0] const boxes = _boxes[0]
...@@ -114,6 +91,10 @@ export class FaceDetectionNet { ...@@ -114,6 +91,10 @@ export class FaceDetectionNet {
minConfidence minConfidence
) )
const paddedHeightRelative = (netInput.getPaddings(0).y + netInput.getInputHeight(0)) / netInput.getInputHeight(0)
const paddedWidthRelative = (netInput.getPaddings(0).x + netInput.getInputWidth(0)) / netInput.getInputWidth(0)
const results = indices const results = indices
.map(idx => { .map(idx => {
const [top, bottom] = [ const [top, bottom] = [
...@@ -132,7 +113,10 @@ export class FaceDetectionNet { ...@@ -132,7 +113,10 @@ export class FaceDetectionNet {
right - left, right - left,
bottom - top bottom - top
), ),
imageDimensions as Dimensions {
height: netInput.getInputHeight(0),
width: netInput.getInputWidth(0)
}
) )
}) })
......
import * as tf from '@tensorflow/tfjs-core';
const resizedImageSize = [512, 512] as [number, number]
const weight = tf.scalar(0.007843137718737125)
const bias = tf.scalar(1)
export function resizeLayer(x: tf.Tensor4D) {
return tf.tidy(() => {
const resized = tf.image.resizeBilinear(x, resizedImageSize, false)
return tf.sub(tf.mul(resized, weight), bias)
})
}
\ No newline at end of file
import * as tf from '@tensorflow/tfjs-core'; import * as tf from '@tensorflow/tfjs-core';
import { convLayer } from '../commons/convLayer'; import { convLayer } from '../commons/convLayer';
import { toInputTensor } from '../commons/toInputTensor';
import { ConvParams } from '../commons/types'; import { ConvParams } from '../commons/types';
import { NetInput } from '../NetInput'; import { NetInput } from '../NetInput';
import { Point } from '../Point'; import { Point } from '../Point';
...@@ -42,7 +41,7 @@ export class FaceLandmarkNet { ...@@ -42,7 +41,7 @@ export class FaceLandmarkNet {
this._params = extractParams(weights) this._params = extractParams(weights)
} }
public forwardTensor(input: tf.Tensor | NetInput): tf.Tensor2D { public forwardInput(input: NetInput): tf.Tensor2D {
const params = this._params const params = this._params
if (!params) { if (!params) {
...@@ -50,7 +49,7 @@ export class FaceLandmarkNet { ...@@ -50,7 +49,7 @@ export class FaceLandmarkNet {
} }
return tf.tidy(() => { return tf.tidy(() => {
const { batchTensor, batchInfo } = toInputTensor(input, 128, true) const batchTensor = input.toBatchTensor(128, true)
let out = conv(batchTensor, params.conv0_params) let out = conv(batchTensor, params.conv0_params)
out = maxPool(out) out = maxPool(out)
...@@ -79,22 +78,22 @@ export class FaceLandmarkNet { ...@@ -79,22 +78,22 @@ export class FaceLandmarkNet {
*/ */
const landmarkTensors = fc1 const landmarkTensors = fc1
.mul(tf.stack(batchInfo.map(info => .mul(tf.stack(Array.from(Array(input.batchSize), (_, batchIdx) =>
createInterleavedTensor( createInterleavedTensor(
info.paddingX + info.originalWidth, input.getPaddings(batchIdx).x + input.getInputWidth(batchIdx),
info.paddingY + info.originalHeight input.getPaddings(batchIdx).y + input.getInputHeight(batchIdx)
) )
))) )))
.sub(tf.stack(batchInfo.map(info => .sub(tf.stack(Array.from(Array(input.batchSize), (_, batchIdx) =>
createInterleavedTensor( createInterleavedTensor(
Math.floor(info.paddingX / 2), Math.floor(input.getPaddings(batchIdx).x / 2),
Math.floor(info.paddingY / 2) Math.floor(input.getPaddings(batchIdx).y / 2)
) )
))) )))
.div(tf.stack(batchInfo.map(info => .div(tf.stack(Array.from(Array(input.batchSize), (_, batchIdx) =>
createInterleavedTensor( createInterleavedTensor(
info.originalWidth, input.getInputWidth(batchIdx),
info.originalHeight input.getInputHeight(batchIdx)
) )
))) )))
...@@ -102,40 +101,33 @@ export class FaceLandmarkNet { ...@@ -102,40 +101,33 @@ export class FaceLandmarkNet {
}) })
} }
public async forward(input: tf.Tensor | NetInput | TNetInput): Promise<tf.Tensor2D> { public async forward(input: TNetInput): Promise<tf.Tensor2D> {
const netInput = input instanceof tf.Tensor return this.forwardInput(await toNetInput(input, true))
? input
: await toNetInput(input)
return this.forwardTensor(netInput)
} }
public async detectLandmarks(input: tf.Tensor | NetInput | TNetInput): Promise<FaceLandmarks | FaceLandmarks[]> { public async detectLandmarks(input: TNetInput): Promise<FaceLandmarks | FaceLandmarks[]> {
const netInput = input instanceof tf.Tensor const netInput = await toNetInput(input, true)
? input
: await toNetInput(input)
const landmarkTensors = tf.unstack(this.forwardTensor(netInput)) const landmarkTensors = tf.unstack(this.forwardInput(netInput))
const landmarksForBatch = await Promise.all(landmarkTensors.map( const landmarksForBatch = await Promise.all(landmarkTensors.map(
async (landmarkTensor, batchIdx) => { async (landmarkTensor, batchIdx) => {
const landmarksArray = Array.from(await landmarkTensor.data()) const landmarksArray = Array.from(await landmarkTensor.data())
landmarkTensor.dispose()
const xCoords = landmarksArray.filter((_, i) => isEven(i)) const xCoords = landmarksArray.filter((_, i) => isEven(i))
const yCoords = landmarksArray.filter((_, i) => !isEven(i)) const yCoords = landmarksArray.filter((_, i) => !isEven(i))
const [height, width] = netInput instanceof tf.Tensor
? netInput.shape.slice(1)
: [netInput.canvases[batchIdx].height, netInput.canvases[batchIdx].width]
return new FaceLandmarks( return new FaceLandmarks(
Array(68).fill(0).map((_, i) => new Point(xCoords[i], yCoords[i])), Array(68).fill(0).map((_, i) => new Point(xCoords[i], yCoords[i])),
{ height, width } {
height: netInput.getInputHeight(batchIdx),
width : netInput.getInputWidth(batchIdx),
}
) )
} }
)) ))
landmarkTensors.forEach(t => t.dispose())
return landmarksForBatch.length === 1 ? landmarksForBatch[0] : landmarksForBatch return landmarksForBatch.length === 1 ? landmarksForBatch[0] : landmarksForBatch
} }
} }
\ No newline at end of file
import * as tf from '@tensorflow/tfjs-core'; import * as tf from '@tensorflow/tfjs-core';
import { getImageTensor } from '../commons/getImageTensor';
import { NetInput } from '../NetInput'; import { NetInput } from '../NetInput';
import { padToSquare } from '../padToSquare';
import { toNetInput } from '../toNetInput'; import { toNetInput } from '../toNetInput';
import { TNetInput } from '../types'; import { TNetInput } from '../types';
import { convDown } from './convLayer'; import { convDown } from './convLayer';
...@@ -32,25 +30,18 @@ export class FaceRecognitionNet { ...@@ -32,25 +30,18 @@ export class FaceRecognitionNet {
this._params = extractParams(weights) this._params = extractParams(weights)
} }
public async forward(input: tf.Tensor | NetInput | TNetInput): Promise<tf.Tensor2D> { public async forwardInput(input: NetInput): Promise<tf.Tensor2D> {
if (!this._params) { if (!this._params) {
throw new Error('FaceRecognitionNet - load model before inference') throw new Error('FaceRecognitionNet - load model before inference')
} }
const netInput = input instanceof tf.Tensor
? input
: await toNetInput(input)
return tf.tidy(() => { return tf.tidy(() => {
const batchTensor = input.toBatchTensor(150, true)
let x = padToSquare(getImageTensor(netInput), true) const normalized = normalize(batchTensor)
// work with 150 x 150 sized face images
if (x.shape[1] !== 150 || x.shape[2] !== 150) {
x = tf.image.resizeBilinear(x, [150, 150])
}
x = normalize(x)
let out = convDown(x, this._params.conv32_down) let out = convDown(normalized, this._params.conv32_down)
out = tf.maxPool(out, 3, 2, 'valid') out = tf.maxPool(out, 3, 2, 'valid')
out = residual(out, this._params.conv32_1) out = residual(out, this._params.conv32_1)
...@@ -77,13 +68,12 @@ export class FaceRecognitionNet { ...@@ -77,13 +68,12 @@ export class FaceRecognitionNet {
return fullyConnected return fullyConnected
}) })
} }
public async forward(input: TNetInput): Promise<tf.Tensor2D> {
return this.forwardInput(await toNetInput(input, true))
}
public async computeFaceDescriptor(input: tf.Tensor | NetInput | TNetInput) { public async computeFaceDescriptor(input: TNetInput) {
const netInput = input instanceof tf.Tensor const result = await this.forward(await toNetInput(input, true))
? input
: await toNetInput(input)
const result = await this.forward(netInput)
const data = await result.data() const data = await result.data()
result.dispose() result.dispose()
return data as Float32Array return data as Float32Array
......
...@@ -35,7 +35,7 @@ export function loadModels(url: string) { ...@@ -35,7 +35,7 @@ export function loadModels(url: string) {
} }
export function locateFaces( export function locateFaces(
input: tf.Tensor | NetInput | TNetInput, input: TNetInput,
minConfidence?: number, minConfidence?: number,
maxResults?: number maxResults?: number
): Promise<FaceDetection[]> { ): Promise<FaceDetection[]> {
...@@ -43,13 +43,13 @@ export function locateFaces( ...@@ -43,13 +43,13 @@ export function locateFaces(
} }
export function detectLandmarks( export function detectLandmarks(
input: tf.Tensor | NetInput | TNetInput input: TNetInput
): Promise<FaceLandmarks | FaceLandmarks[]> { ): Promise<FaceLandmarks | FaceLandmarks[]> {
return landmarkNet.detectLandmarks(input) return landmarkNet.detectLandmarks(input)
} }
export function computeFaceDescriptor( export function computeFaceDescriptor(
input: tf.Tensor | NetInput | TNetInput input: TNetInput
): Promise<Float32Array> { ): Promise<Float32Array> {
return recognitionNet.computeFaceDescriptor(input) return recognitionNet.computeFaceDescriptor(input)
} }
......
import * as tf from '@tensorflow/tfjs-core';
import { isMediaElement } from './commons/isMediaElement';
import { isTensor3D, isTensor4D } from './commons/isTensor';
import { NetInput } from './NetInput'; import { NetInput } from './NetInput';
import { TNetInput } from './types'; import { TNetInput } from './types';
import { awaitMediaLoaded, getElement } from './utils'; import { awaitMediaLoaded, resolveInput } from './utils';
/** /**
* Validates the input to make sure, they are valid net inputs and awaits all media elements * Validates the input to make sure, they are valid net inputs and awaits all media elements
* to be finished loading. * to be finished loading.
* *
* @param input The input, which can be a media element or an array of different media elements. * @param input The input, which can be a media element or an array of different media elements.
* @param manageCreatedInput If a new NetInput instance is created from the inputs, this flag
* determines, whether to set the NetInput as managed or not.
* @returns A NetInput instance, which can be passed into one of the neural networks. * @returns A NetInput instance, which can be passed into one of the neural networks.
*/ */
export async function toNetInput( export async function toNetInput(
input: NetInput | TNetInput inputs: TNetInput,
manageCreatedInput: boolean = false
): Promise<NetInput> { ): Promise<NetInput> {
if (input instanceof NetInput) { if (inputs instanceof NetInput) {
return input return inputs
}
const afterCreate = (netInput: NetInput) => manageCreatedInput
? netInput.managed()
: netInput
if (isTensor4D(inputs)) {
return afterCreate(new NetInput(inputs as tf.Tensor4D))
} }
const mediaArgArray = Array.isArray(input) let inputArgArray = Array.isArray(inputs)
? input ? inputs
: [input] : [inputs]
if (!mediaArgArray.length) { if (!inputArgArray.length) {
throw new Error('toNetInput - empty array passed as input') throw new Error('toNetInput - empty array passed as input')
} }
const medias = mediaArgArray.map(getElement) const getIdxHint = (idx: number) => Array.isArray(inputs) ? ` at input index ${idx}:` : ''
medias.forEach((media, i) => { const inputArray = inputArgArray
if (!(media instanceof HTMLImageElement || media instanceof HTMLVideoElement || media instanceof HTMLCanvasElement)) { .map(resolveInput)
const idxHint = Array.isArray(input) ? ` at input index ${i}:` : '' .map((input, i) => {
if (typeof mediaArgArray[i] === 'string') { if (isTensor4D(input)) {
throw new Error(`toNetInput -${idxHint} string passed, but could not resolve HTMLElement for element id`) // if tf.Tensor4D is passed in the input array, the batch size has to be 1
const batchSize = input.shape[0]
if (batchSize !== 1) {
throw new Error(`toNetInput -${getIdxHint(i)} tf.Tensor4D with batchSize ${batchSize} passed, but not supported in input array`)
}
// to tf.Tensor3D
return input.reshape(input.shape.slice(1))
} }
throw new Error(`toNetInput -${idxHint} expected media to be of type HTMLImageElement | HTMLVideoElement | HTMLCanvasElement, or to be an element id`) return input
})
inputArray.forEach((input, i) => {
if (!isMediaElement(input) && !isTensor3D(input)) {
if (typeof inputArgArray[i] === 'string') {
throw new Error(`toNetInput -${getIdxHint(i)} string passed, but could not resolve HTMLElement for element id ${inputArgArray[i]}`)
}
throw new Error(`toNetInput -${getIdxHint(i)} expected media to be of type HTMLImageElement | HTMLVideoElement | HTMLCanvasElement | tf.Tensor3D, or to be an element id`)
} }
}) })
// wait for all media elements being loaded // wait for all media elements being loaded
await Promise.all( await Promise.all(
medias.map(media => awaitMediaLoaded(media)) inputArray.map(input => isMediaElement(input) && awaitMediaLoaded(input))
) )
return new NetInput(medias) return afterCreate(new NetInput(inputArray))
} }
\ No newline at end of file
import * as tf from '@tensorflow/tfjs-core';
import { NetInput } from './NetInput';
export type TMediaElement = HTMLImageElement | HTMLVideoElement | HTMLCanvasElement export type TMediaElement = HTMLImageElement | HTMLVideoElement | HTMLCanvasElement
export type TNetInputArg = string | TMediaElement export type TResolvedNetInput = TMediaElement | tf.Tensor3D | tf.Tensor4D
export type TNetInputArg = string | TResolvedNetInput
export type TNetInput = TNetInputArg | Array<TNetInputArg> export type TNetInput = TNetInputArg | Array<TNetInputArg> | NetInput | tf.Tensor4D
export type Dimensions = { export type Dimensions = {
width: number width: number
......
import * as tf from '@tensorflow/tfjs-core'; import * as tf from '@tensorflow/tfjs-core';
import { isTensor4D } from './commons/isTensor';
import { Dimensions } from './types'; import { Dimensions } from './types';
export function isFloat(num: number) { export function isFloat(num: number) {
...@@ -14,7 +15,7 @@ export function round(num: number) { ...@@ -14,7 +15,7 @@ export function round(num: number) {
return Math.floor(num * 100) / 100 return Math.floor(num * 100) / 100
} }
export function getElement(arg: string | any) { export function resolveInput(arg: string | any) {
if (typeof arg === 'string') { if (typeof arg === 'string') {
return document.getElementById(arg) return document.getElementById(arg)
} }
...@@ -106,12 +107,12 @@ export function bufferToImage(buf: Blob): Promise<HTMLImageElement> { ...@@ -106,12 +107,12 @@ export function bufferToImage(buf: Blob): Promise<HTMLImageElement> {
} }
export async function imageTensorToCanvas( export async function imageTensorToCanvas(
imgTensor: tf.Tensor4D, imgTensor: tf.Tensor,
canvas?: HTMLCanvasElement canvas?: HTMLCanvasElement
): Promise<HTMLCanvasElement> { ): Promise<HTMLCanvasElement> {
const targetCanvas = canvas || document.createElement('canvas') const targetCanvas = canvas || document.createElement('canvas')
const [_, height, width, numChannels] = imgTensor.shape const [height, width, numChannels] = imgTensor.shape.slice(isTensor4D(imgTensor) ? 1 : 0)
await tf.toPixels(imgTensor.as3D(height, width, numChannels).toInt(), targetCanvas) await tf.toPixels(imgTensor.as3D(height, width, numChannels).toInt(), targetCanvas)
return targetCanvas return targetCanvas
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment