Commit ca47cbbd by vincent

implemented yolo loss functions + mean subtraction for retrained model

parent 542dc68c
......@@ -3,7 +3,7 @@ import * as tf from '@tensorflow/tfjs-core';
import { isTensor3D, isTensor4D } from './commons/isTensor';
import { padToSquare } from './padToSquare';
import { Point } from './Point';
import { TResolvedNetInput } from './types';
import { TResolvedNetInput, Dimensions } from './types';
import { createCanvasFromMedia } from './utils';
export class NetInput {
......@@ -14,6 +14,7 @@ export class NetInput {
private _inputDimensions: number[][] = []
private _paddings: Point[] = []
private _inputSize: number = 0
constructor(
inputs: tf.Tensor4D | Array<TResolvedNetInput>,
......@@ -81,6 +82,10 @@ export class NetInput {
return this._paddings
}
public get inputSize(): number {
return this._inputSize
}
public getInputDimensions(batchIdx: number): number[] {
return this._inputDimensions[batchIdx]
}
......@@ -97,8 +102,26 @@ export class NetInput {
return this._paddings[batchIdx]
}
public getRelativePaddings(batchIdx: number): Point {
return new Point(
(this.getPaddings(batchIdx).x + this.getInputWidth(batchIdx)) / this.getInputWidth(batchIdx),
(this.getPaddings(batchIdx).y + this.getInputHeight(batchIdx)) / this.getInputHeight(batchIdx)
)
}
public getReshapedInputDimensions(batchIdx: number): Dimensions {
const [h, w] = [this.getInputHeight(batchIdx), this.getInputWidth(batchIdx)]
const f = this.inputSize / Math.max(h, w)
return {
height: Math.floor(h * f),
width: Math.floor(w * f)
}
}
public toBatchTensor(inputSize: number, isCenterInputs: boolean = true): tf.Tensor4D {
this._inputSize = inputSize
return tf.tidy(() => {
const inputTensors = this._inputs.map((inputTensor: tf.Tensor3D) => {
......
......@@ -2,7 +2,6 @@ export function extractWeightsFactory(weights: Float32Array) {
let remainingWeights = weights
function extractWeights(numWeights: number): Float32Array {
console.log(numWeights)
const ret = remainingWeights.slice(0, numWeights)
remainingWeights = remainingWeights.slice(numWeights)
return ret
......
import * as tf from '@tensorflow/tfjs-core';
export function normalize(x: tf.Tensor4D, meanRgb: number[]): tf.Tensor4D {
return tf.tidy(() => {
const [r, g, b] = meanRgb
const avg_r = tf.fill([...x.shape.slice(0, 3), 1], r)
const avg_g = tf.fill([...x.shape.slice(0, 3), 1], g)
const avg_b = tf.fill([...x.shape.slice(0, 3), 1], b)
const avg_rgb = tf.concat([avg_r, avg_g, avg_b], 3)
return tf.sub(x, avg_rgb)
})
}
\ No newline at end of file
......@@ -81,19 +81,18 @@ export class FaceDetectionNet extends NeuralNetwork<NetParams> {
minConfidence
)
const paddedHeightRelative = (netInput.getPaddings(0).y + netInput.getInputHeight(0)) / netInput.getInputHeight(0)
const paddedWidthRelative = (netInput.getPaddings(0).x + netInput.getInputWidth(0)) / netInput.getInputWidth(0)
const paddings = netInput.getRelativePaddings(0)
const results = indices
.map(idx => {
const [top, bottom] = [
Math.max(0, boxes.get(idx, 0)),
Math.min(1.0, boxes.get(idx, 2))
].map(val => val * paddedHeightRelative)
].map(val => val * paddings.y)
const [left, right] = [
Math.max(0, boxes.get(idx, 1)),
Math.min(1.0, boxes.get(idx, 3))
].map(val => val * paddedWidthRelative)
].map(val => val * paddings.x)
return new FaceDetection(
scoresData[idx],
new Rect(
......
import * as tf from '@tensorflow/tfjs-core';
import { NeuralNetwork } from '../commons/NeuralNetwork';
import { normalize } from '../commons/normalize';
import { NetInput } from '../NetInput';
import { toNetInput } from '../toNetInput';
import { TNetInput } from '../types';
import { convDown } from './convLayer';
import { extractParams } from './extractParams';
import { loadQuantizedParams } from './loadQuantizedParams';
import { normalize } from './normalize';
import { residual, residualDown } from './residualLayer';
import { NetParams } from './types';
......@@ -28,7 +28,8 @@ export class FaceRecognitionNet extends NeuralNetwork<NetParams> {
return tf.tidy(() => {
const batchTensor = input.toBatchTensor(150, true)
const normalized = normalize(batchTensor)
const meanRgb = [122.782, 117.001, 104.298]
const normalized = normalize(batchTensor, meanRgb).div(tf.scalar(256)) as tf.Tensor4D
let out = convDown(normalized, params.conv32_down)
out = tf.maxPool(out, 3, 2, 'valid')
......
import * as tf from '@tensorflow/tfjs-core';
export function normalize(x: tf.Tensor4D): tf.Tensor4D {
return tf.tidy(() => {
const avg_r = tf.fill([...x.shape.slice(0, 3), 1], 122.782);
const avg_g = tf.fill([...x.shape.slice(0, 3), 1], 117.001);
const avg_b = tf.fill([...x.shape.slice(0, 3), 1], 104.298);
const avg_rgb = tf.concat([avg_r, avg_g, avg_b], 3)
return tf.div(tf.sub(x, avg_rgb), tf.scalar(256))
})
}
\ No newline at end of file
......@@ -5,6 +5,7 @@ export {
}
export * from './BoundingBox';
export * from './FaceDetection';
export * from './FullFaceDescription';
export * from './NetInput';
......@@ -24,4 +25,6 @@ export * from './mtcnn';
export * from './padToSquare';
export * from './tinyYolov2';
export * from './toNetInput';
export * from './utils'
\ No newline at end of file
export * from './utils';
export * from './types';
\ No newline at end of file
......@@ -4,13 +4,14 @@ import { BoundingBox } from '../BoundingBox';
import { convLayer } from '../commons/convLayer';
import { NeuralNetwork } from '../commons/NeuralNetwork';
import { nonMaxSuppression } from '../commons/nonMaxSuppression';
import { normalize } from '../commons/normalize';
import { FaceDetection } from '../FaceDetection';
import { NetInput } from '../NetInput';
import { Point } from '../Point';
import { toNetInput } from '../toNetInput';
import { TNetInput } from '../types';
import { sigmoid } from '../utils';
import { BOX_ANCHORS, BOX_ANCHORS_SEPARABLE, INPUT_SIZES, IOU_THRESHOLD, NUM_BOXES } from './config';
import { BOX_ANCHORS, BOX_ANCHORS_SEPARABLE, INPUT_SIZES, IOU_THRESHOLD, MEAN_RGB, NUM_BOXES } from './config';
import { convWithBatchNorm } from './convWithBatchNorm';
import { extractParams } from './extractParams';
import { getDefaultParams } from './getDefaultParams';
......@@ -45,7 +46,12 @@ export class TinyYolov2 extends NeuralNetwork<NetParams> {
}
const out = tf.tidy(() => {
const batchTensor = input.toBatchTensor(inputSize, false).div(tf.scalar(255)) as tf.Tensor4D
let batchTensor = input.toBatchTensor(inputSize, false)
batchTensor = this.hasSeparableConvs
? normalize(batchTensor, MEAN_RGB)
: batchTensor
batchTensor = batchTensor.div(tf.scalar(256)) as tf.Tensor4D
let out = convWithBatchNorm(batchTensor, params.conv0)
out = tf.maxPool(out, [2, 2], [2, 2], 'same')
......@@ -87,22 +93,23 @@ export class TinyYolov2 extends NeuralNetwork<NetParams> {
const netInput = await toNetInput(input, true)
const out = await this.forwardInput(netInput, inputSize)
const out0 = tf.tidy(() => tf.unstack(out)[0].expandDims()) as tf.Tensor4D
console.log(out0.shape)
const inputDimensions = {
width: netInput.getInputWidth(0),
height: netInput.getInputHeight(0)
}
const paddings = new Point(
(netInput.getPaddings(0).x + netInput.getInputWidth(0)) / netInput.getInputWidth(0),
(netInput.getPaddings(0).y + netInput.getInputHeight(0)) / netInput.getInputHeight(0)
)
const paddings = netInput.getRelativePaddings(0)
const results = this.postProcess(out, { scoreThreshold, paddings })
const results = this.postProcess(out0, { scoreThreshold, paddings })
const boxes = results.map(res => res.box)
const scores = results.map(res => res.score)
out.dispose()
out0.dispose()
const indices = nonMaxSuppression(
boxes.map(box => box.rescale(inputSize)),
......
......@@ -19,3 +19,5 @@ export const BOX_ANCHORS_SEPARABLE = [
new Point(4.266906, 5.178857),
new Point(9.041765, 10.66308)
]
export const MEAN_RGB = [117.001, 114.697, 97.404]
\ No newline at end of file
......@@ -8,7 +8,7 @@ export function convWithBatchNorm(x: tf.Tensor4D, params: ConvWithBatchNorm | Se
let out = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]]) as tf.Tensor4D
if (params instanceof SeparableConvParams) {
out = tf.separableConv2d(out, params.depthwise_filter, params.pointwise_filter, [1, 1], 'same')
out = tf.separableConv2d(out, params.depthwise_filter, params.pointwise_filter, [1, 1], 'valid')
out = tf.add(out, params.bias)
} else {
out = tf.conv2d(out, params.conv.filters, [1, 1], 'valid')
......
......@@ -31,11 +31,10 @@ function extractorsFactory(extractWeights: ExtractWeightsFunction, paramMappings
}
function extractSeparableConvParams(channelsIn: number, channelsOut: number, mappedPrefix: string): SeparableConvParams {
console.log(mappedPrefix)
const depthwise_filter = tf.tensor4d(extractWeights(3 * 3 * channelsIn), [3, 3, channelsIn, 1])
const pointwise_filter = tf.tensor4d(extractWeights(channelsIn * channelsOut), [1, 1, channelsIn, channelsOut])
const bias = tf.tensor1d(extractWeights(channelsOut))
console.log('done')
paramMappings.push(
{ paramPath: `${mappedPrefix}/depthwise_filter` },
{ paramPath: `${mappedPrefix}/pointwise_filter` },
......
......@@ -105,15 +105,3 @@ async function getTrainData() {
return batch1.concat(batch2)
}
\ No newline at end of file
// https://stackoverflow.com/questions/6274339/how-can-i-shuffle-an-array
function shuffle(a) {
var j, x, i;
for (i = a.length - 1; i > 0; i--) {
j = Math.floor(Math.random() * (i + 1));
x = a[i];
a[i] = a[j];
a[j] = x;
}
return a;
}
\ No newline at end of file
......@@ -10,6 +10,7 @@
<script src="faceLandmarksUi.js"></script>
<script src="faceLandmarksTrain.js"></script>
<script src="FileSaver.js"></script>
<script src="trainUtils.js"></script>
</head>
<body>
<div id="navbar"></div>
......
// https://stackoverflow.com/questions/6274339/how-can-i-shuffle-an-array
function shuffle(a) {
var j, x, i;
for (i = a.length - 1; i > 0; i--) {
j = Math.floor(Math.random() * (i + 1));
x = a[i];
a[i] = a[j];
a[j] = x;
}
return a;
}
\ No newline at end of file
require('./.env')
require('./faceLandmarks/.env')
const express = require('express')
const path = require('path')
const app = express()
const viewsDir = path.join(__dirname, 'views')
app.use(express.static(viewsDir))
app.use(express.static(path.join(__dirname, './public')))
app.use(express.static(path.join(__dirname, './tmp')))
const publicDir = path.join(__dirname, './faceLandmarks')
app.use(express.static(publicDir))
app.use(express.static(path.join(__dirname, './node_modules/file-saver')))
app.use(express.static(path.join(__dirname, '../../examples/public')))
app.use(express.static(path.join(__dirname, '../../weights')))
......@@ -18,6 +16,6 @@ const trainDataPath = path.resolve(process.env.TRAIN_DATA_PATH)
app.use(express.static(trainDataPath))
app.get('/', (req, res) => res.redirect('/face_landmarks'))
app.get('/face_landmarks', (req, res) => res.sendFile(path.join(viewsDir, 'faceLandmarks.html')))
app.get('/face_landmarks', (req, res) => res.sendFile(path.join(publicDir, 'train.html')))
app.listen(3000, () => console.log('Listening on port 3000!'))
\ No newline at end of file
require('./tinyYolov2/.env')
const express = require('express')
const path = require('path')
const app = express()
const publicDir = path.join(__dirname, './tinyYolov2')
app.use(express.static(publicDir))
app.use(express.static(path.join(__dirname, './node_modules/file-saver')))
app.use(express.static(path.join(__dirname, '../../examples/public')))
app.use(express.static(path.join(__dirname, '../../weights')))
app.use(express.static(path.join(__dirname, '../../dist')))
const trainDataPath = path.resolve(process.env.TRAIN_DATA_PATH)
app.use(express.static(trainDataPath))
//app.get('/', (req, res) => res.sendFile(path.join(publicDir, 'train.html')))
//app.get('/', (req, res) => res.sendFile(path.join(publicDir, 'tinyYolov2FaceDetectionVideo.html')))
app.get('/', (req, res) => res.sendFile(path.join(publicDir, 'testLoss.html')))
app.listen(3000, () => console.log('Listening on port 3000!'))
\ No newline at end of file
<!DOCTYPE html>
<html>
<head>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@0.12.0"> </script>
<script src="FileSaver.js"></script>
</head>
<body>
<script>
const glorotNormal = tf.initializers.glorotNormal()
saveWeights()
function initSeparableConvWeights(inChannels, outChannels) {
return {
depthwiseFilter: glorotNormal.apply([3, 3, inChannels, 1]),
pointwiseFilter: glorotNormal.apply([1, 1, inChannels, outChannels]),
bias: tf.zeros([outChannels])
}
}
function initTinyYolov2SeparableWeights() {
const conv0 = initSeparableConvWeights(3, 16)
const conv1 = initSeparableConvWeights(16, 32)
const conv2 = initSeparableConvWeights(32, 64)
const conv3 = initSeparableConvWeights(64, 128)
const conv4 = initSeparableConvWeights(128, 256)
const conv5 = initSeparableConvWeights(256, 512)
const conv6 = initSeparableConvWeights(512, 1024)
const conv7 = initSeparableConvWeights(1024, 1024)
const conv8 = {
filters: glorotNormal.apply([1, 1, 1024, 25]),
bias: tf.zeros([25])
}
return {
conv0,
conv1,
conv2,
conv3,
conv4,
conv5,
conv6,
conv7,
conv8
}
}
function saveWeights() {
const w = initTinyYolov2SeparableWeights()
const binaryWeights = new Float32Array(
Array(8).fill(0)
.map((_, i) => w[`conv${i}`])
.map(ps => [ps.depthwiseFilter, ps.pointwiseFilter, ps.bias])
.reduce((flat, arr) => flat.concat(arr))
.concat([w.conv8.filters, w.conv8.bias])
.map(tensor => console.log(tensor) || Array.from(tensor.dataSync()))
.reduce((flat, arr) => flat.concat(arr))
)
saveAs(new Blob([binaryWeights]), 'foo.weights')
}
</script>
</body>
</html>
\ No newline at end of file
// hyper parameters
const objectScale = 1
const noObjectScale = 0.5
const coordScale = 5
const squared = e => Math.pow(e, 2)
const isSameAnchor = (p1, p2) =>
p1.row === p2.row
&& p1.col === p2.col
&& p1.anchor === p2.anchor
const sum = vals => vals.reduce((sum, val) => sum + val, 0)
function computeNoObjectLoss(negative) {
return squared(0 - negative.score)
}
function computeObjectLoss({ groundTruth, pred }) {
return squared(
faceapi.iou(
groundTruth.box,
pred.box
)
- pred.score
)
}
function computeCoordLoss({ groundTruth, pred }, imgDims) {
const anchor = window.net.anchors[groundTruth.anchor]
const getWidthCorrections = box => Math.log((box.width / imgDims.width) / anchor.x)
const getHeightCorrections = box => Math.log((box.height / imgDims.height) / anchor.y)
return squared((groundTruth.box.left - pred.box.left) / imgDims.width)
+ squared((groundTruth.box.top - pred.box.top) / imgDims.height)
+ squared(getWidthCorrections(groundTruth.box) - getWidthCorrections(pred.box))
+ squared(getHeightCorrections(groundTruth.box) - getHeightCorrections(pred.box))
}
function computeLoss(outBoxesByAnchor, groundTruth, inputSize, imgDims) {
const { anchors } = window.net
const numCells = inputSize / 32
const groundTruthByAnchor = groundTruth.map(rect => {
const x = rect.x * imgDims.width
const y = rect.y * imgDims.height
const width = rect.width * imgDims.width
const height = rect.height * imgDims.height
const row = Math.round((y / inputSize) * numCells)
const col = Math.round((x / inputSize) * numCells)
const anchorsByIou = anchors.map((a, idx) => ({
idx,
iou: faceapi.iou(
new faceapi.BoundingBox(0, 0, a.x * 32, a.y * 32),
new faceapi.BoundingBox(0, 0, width, height)
)
})).sort((a1, a2) => a2.iou - a1.iou)
console.log('anchorsByIou', anchorsByIou)
const anchor = anchorsByIou[0].idx
return {
box: new faceapi.BoundingBox(x, y, x + width, y + height),
row,
col,
anchor
}
})
console.log('outBoxesByAnchor', outBoxesByAnchor.filter(o => o.score > 0.5).map(o => o))
console.log('outBoxesByAnchor', outBoxesByAnchor.filter(o => o.score > 0.5).map(o => o.box.rescale(imgDims)))
console.log('groundTruthByAnchor', groundTruthByAnchor)
const negatives = outBoxesByAnchor.filter(pred => !groundTruthByAnchor.find(gt => isSameAnchor(gt, pred)))
const positives = outBoxesByAnchor
.map(pred => ({
groundTruth: groundTruthByAnchor.find(gt => isSameAnchor(gt, pred)),
pred: {
...pred,
box: pred.box.rescale(imgDims)
}
}))
.filter(pos => !!pos.groundTruth)
console.log('negatives', negatives)
console.log('positives', positives)
const noObjectLoss = sum(negatives.map(computeNoObjectLoss))
const objectLoss = sum(positives.map(computeObjectLoss))
const coordLoss = sum(positives.map(positive => computeCoordLoss(positive, imgDims)))
console.log('noObjectLoss', noObjectLoss)
console.log('objectLoss', objectLoss)
console.log('coordLoss', coordLoss)
return noObjectScale * noObjectLoss
+ objectScale * objectLoss
+ coordScale * coordLoss
// we don't compute a class loss, since we only have 1 class
// + class_scale * sum(class_loss)
}
\ No newline at end of file
<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" href="styles.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/materialize/0.100.2/css/materialize.css">
<script type="text/javascript" src="https://code.jquery.com/jquery-2.1.1.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/materialize/0.100.2/js/materialize.min.js"></script>
<script src="face-api.js"></script>
<script src="commons.js"></script>
<script src="FileSaver.js"></script>
<script src="trainUtils.js"></script>
</head>
<body>
<script>
tf = faceapi.tf
const weightsUrl = '/tmp/initial_tiny_yolov2_glorot_normal.weights'
window.saveEveryNthIteration = 2
window.trainSteps = 100
window.optimizer = tf.train.adam(0.001, 0.9, 0.999, 1e-8)
function lossFunction(labels, out) {
return tf.losses.meanSquaredError(labels, out)
}
async function loadNetWeights(uri) {
return new Float32Array(await (await fetch(uri)).arrayBuffer())
}
async function getTrainData() {
// TBD
}
async function run() {
const weights = await loadNetWeights(weightsUrl)
window.net = new faceapi.TinyYolov2(true)
window.net.load(weights)
window.trainData = await getTrainData()
window.net.variable()
}
/*
const outTensor = await window.net.forward(netInput, 608)
const detections = await window.net.locateFaces(netInput, forwardParams)
const outBoxesByAnchor = window.net.postProcess(
outTensor,
{
scoreThreshold: 0,
paddings: netInput.getRelativePaddings(0)
}
)
const groundTruth = detections.map(det => det.forSize(1, 1).box)
console.log(computeLoss(
outBoxesByAnchor,
groundTruth,
netInput.inputSize,
netInput.getReshapedInputDimensions(0)
))
*/
async function train(batchSize = 1) {
for (let i = 0; i < trainSteps; i++) {
console.log('step', i)
const batchCreators = createBatchCreators(shuffle(window.trainData), batchSize)
let ts = Date.now()
await trainStep(batchCreators)
ts = Date.now() - ts
console.log('step %s done (%s ms)', i, ts)
if (((i + 1) % saveEveryNthIteration) === 0) {
saveWeights(i)
}
}
}
run()
</script>
</body>
</html>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment