Commit 45c9d6f8 by vincent

finalize architecture

parent 23d4664d
...@@ -15,7 +15,7 @@ function fromData(input: number[]): tf.Tensor4D { ...@@ -15,7 +15,7 @@ function fromData(input: number[]): tf.Tensor4D {
throw new Error(`invalid input size: ${dim}x${dim}x3 (array length: ${input.length})`) throw new Error(`invalid input size: ${dim}x${dim}x3 (array length: ${input.length})`)
} }
return tf.tensor4d(input as number[], [1, 580, 580, 3]) return tf.tensor4d(input as number[], [1, dim, dim, 3])
} }
function fromImageData(input: ImageData[]) { function fromImageData(input: ImageData[]) {
...@@ -31,24 +31,30 @@ function fromImageData(input: ImageData[]) { ...@@ -31,24 +31,30 @@ function fromImageData(input: ImageData[]) {
return tf.cast(tf.concat(imgTensors, 0), 'float32') return tf.cast(tf.concat(imgTensors, 0), 'float32')
} }
function getImgTensor(input: ImageData|ImageData[]|number[]) {
return tf.tidy(() => {
const imgDataArray = input instanceof ImageData
? [input]
: (
input[0] instanceof ImageData
? input as ImageData[]
: null
)
return imgDataArray !== null
? fromImageData(imgDataArray)
: fromData(input as number[])
})
}
export function faceDetectionNet(weights: Float32Array) { export function faceDetectionNet(weights: Float32Array) {
const params = extractParams(weights) const params = extractParams(weights)
async function forward(input: ImageData|ImageData[]|number[]) { function forwardTensor(imgTensor: tf.Tensor4D) {
return tf.tidy(() => { return tf.tidy(() => {
const imgDataArray = input instanceof ImageData
? [input]
: (
input[0] instanceof ImageData
? input as ImageData[]
: null
)
const imgTensor = imgDataArray !== null
? fromImageData(imgDataArray)
: fromData(input as number[])
const resized = resizeLayer(imgTensor) as tf.Tensor4D const resized = resizeLayer(imgTensor) as tf.Tensor4D
const features = mobileNetV1(resized, params.mobilenetv1_params) const features = mobileNetV1(resized, params.mobilenetv1_params)
...@@ -57,14 +63,54 @@ export function faceDetectionNet(weights: Float32Array) { ...@@ -57,14 +63,54 @@ export function faceDetectionNet(weights: Float32Array) {
classPredictions classPredictions
} = predictionLayer(features.out, features.conv11, params.prediction_layer_params) } = predictionLayer(features.out, features.conv11, params.prediction_layer_params)
const decoded = outputLayer(boxPredictions, classPredictions, params.output_layer_params) return outputLayer(boxPredictions, classPredictions, params.output_layer_params)
})
}
// TODO debug output
function forward(input: ImageData|ImageData[]|number[]) {
return tf.tidy(
() => forwardTensor(getImgTensor(input))
)
}
return decoded async function locateFaces(
input: ImageData|ImageData[]|number[],
minConfidence: number = 0.8
) {
const imgTensor = getImgTensor(input)
const [_, height, width] = imgTensor.shape
const {
boxes: _boxes,
scores: _scores
} = forwardTensor(imgTensor)
// TODO batches
const boxes = _boxes[0]
const scores = _scores[0]
// TODO find a better way to filter by minConfidence
const data = await scores.data()
return Array.from(data)
.map((score, idx) => ({ score, idx }))
.filter(({ score }) => minConfidence < score)
.map(({ score, idx }) => ({
score,
box: {
left: Math.max(0, width * boxes.get(idx, 0)),
right: Math.min(width, width * boxes.get(idx, 1)),
top: Math.max(0, height * boxes.get(idx, 2)),
bottom: Math.min(height, height * boxes.get(idx, 3))
}
}))
})
} }
return { return {
forward forward,
locateFaces
} }
} }
\ No newline at end of file
...@@ -2,12 +2,6 @@ import * as tf from '@tensorflow/tfjs-core'; ...@@ -2,12 +2,6 @@ import * as tf from '@tensorflow/tfjs-core';
import { FaceDetectionNet } from './types'; import { FaceDetectionNet } from './types';
function batchMultiClassNonMaxSuppressionLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) {
// TODO
return x0
}
function getCenterCoordinatesAndSizesLayer(x: tf.Tensor2D) { function getCenterCoordinatesAndSizesLayer(x: tf.Tensor2D) {
const vec = tf.unstack(tf.transpose(x, [1, 0])) const vec = tf.unstack(tf.transpose(x, [1, 0]))
...@@ -27,7 +21,7 @@ function getCenterCoordinatesAndSizesLayer(x: tf.Tensor2D) { ...@@ -27,7 +21,7 @@ function getCenterCoordinatesAndSizesLayer(x: tf.Tensor2D) {
} }
} }
function decodeLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) { function decodeBoxesLayer(x0: tf.Tensor2D, x1: tf.Tensor2D) {
const { const {
sizes, sizes,
centers centers
...@@ -61,15 +55,30 @@ export function outputLayer( ...@@ -61,15 +55,30 @@ export function outputLayer(
const batchSize = boxPredictions.shape[0] const batchSize = boxPredictions.shape[0]
const decoded = decodeLayer( let boxes = decodeBoxesLayer(
tf.reshape(tf.tile(params.extra_dim, [batchSize, 1, 1]), [-1, 4]) as tf.Tensor2D, tf.reshape(tf.tile(params.extra_dim, [batchSize, 1, 1]), [-1, 4]) as tf.Tensor2D,
tf.reshape(boxPredictions, [-1, 4]) as tf.Tensor2D tf.reshape(boxPredictions, [-1, 4]) as tf.Tensor2D
) )
boxes = tf.reshape(
boxes,
[batchSize, (boxes.shape[0] / batchSize), 4]
)
const scoresAndClasses = tf.sigmoid(tf.slice(classPredictions, [0, 0, 1], [-1, -1, -1]))
let scores = tf.slice(scoresAndClasses, [0, 0, 0], [-1, -1, 1]) as tf.Tensor
scores = tf.reshape(
scores,
[batchSize, scores.shape[1]]
)
const in1 = tf.sigmoid(tf.slice(classPredictions, [0, 0, 1], [-1, -1, -1])) const boxesByBatch = tf.unstack(boxes) as tf.Tensor2D[]
const in2 = tf.expandDims(tf.reshape(decoded, [batchSize, 5118, 4]), 2) const scoresByBatch = tf.unstack(scores) as tf.Tensor1D[]
return decoded return {
boxes: boxesByBatch,
scores: scoresByBatch
}
}) })
} }
\ No newline at end of file
...@@ -2,10 +2,12 @@ import { euclideanDistance } from './euclideanDistance'; ...@@ -2,10 +2,12 @@ import { euclideanDistance } from './euclideanDistance';
import { faceDetectionNet } from './faceDetectionNet'; import { faceDetectionNet } from './faceDetectionNet';
import { faceRecognitionNet } from './faceRecognitionNet'; import { faceRecognitionNet } from './faceRecognitionNet';
import { normalize } from './normalize'; import { normalize } from './normalize';
import * as tf from '@tensorflow/tfjs-core';
export { export {
euclideanDistance, euclideanDistance,
faceDetectionNet, faceDetectionNet,
faceRecognitionNet, faceRecognitionNet,
normalize normalize,
tf
} }
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment