Commit b4f4729f by vincent

tools for weight quantization

parent e95dd35d
{
"scripts": {
"start": "node server.js"
},
"author": "justadudewhohacks",
"license": "MIT",
"dependencies": {
"express": "^4.16.3",
"file-saver": "^1.3.8"
}
}
function getQuantizationRange(min, max, qdtype) {
if (qdtype !== 0 && qdtype !== 1) {
throw new Error('qdtype !== 0 && qdtype !== 1: ' + qdtype)
}
const quantMax = qdtype === 0 ? 255 : 65535
const scale = (max - min) / quantMax
let result = { scale, min, max }
if (min <= 0 && 0 <= max) {
const quantizedZeroPoint = (0 - min) / scale
const nudgedZeroPoint = Math.round(quantizedZeroPoint)
result.min = (-nudgedZeroPoint) * scale
result.max = quantMax * scale + result.min
}
return result
}
function quantizeWeights(tensor, qdtype = 0) {
const min = tensor.min().dataSync()[0]
const max = tensor.max().dataSync()[0]
if (min === max) {
return {
scale: 1.0,
min,
qdata: qdtype === 0 ? new Uint8Array(tensor.size) : new Uint16Array(tensor.size)
}
}
const q = getQuantizationRange(min, max, qdtype)
const qdata = tf.round(tf.clipByValue(tensor, q.min, q.max).sub(tf.scalar(q.min)).div(tf.scalar(q.scale))).dataSync()
return {
scale: q.scale,
min: q.min,
qdata: qdtype === 0 ? new Uint8Array(qdata) : new Uint16Array(qdata)
}
}
function dequantizeWeights(qdata, scale, min) {
if (qdata.qdata && qdata.scale && qdata.min) {
return Float32Array.from(qdata.qdata, v => (v * qdata.scale) + qdata.min)
}
return Float32Array.from(qdata, v => (v * scale) + min)
}
const express = require('express')
const path = require('path')
const app = express()
const viewsDir = path.join(__dirname, 'views')
app.use(express.static(viewsDir))
app.use(express.static(path.join(__dirname, './public')))
app.use(express.static(path.join(__dirname, './node_modules/file-saver')))
app.use(express.static(path.join(__dirname, '../../examples/public')))
app.use(express.static(path.join(__dirname, '../../weights')))
app.use(express.static(path.join(__dirname, '../../dist')))
app.get('/', (req, res) => res.redirect('/quantize_tiny_yolov2'))
app.get('/quantize_tiny_yolov2', (req, res) => res.sendFile(path.join(viewsDir, 'quantizeTinyYolov2.html')))
app.listen(3000, () => console.log('Listening on port 3000!'))
\ No newline at end of file
<!DOCTYPE html>
<html>
<head>
<script src="face-api.js"></script>
<script src="commons.js"></script>
<script src="FileSaver.js"></script>
<script src="quantization.js"></script>
</head>
<body>
<script>
tf = faceapi.tf
async function loadNetWeights(uri) {
return new Float32Array(await (await fetch(uri)).arrayBuffer())
}
function makeShards(weightArray) {
const maxLength = 4096 * 1024
const shards = []
let shardIdx = 0
for (let i = 0; i < weightArray.length; i++) {
if (!shards[shardIdx]) {
shards[shardIdx] = []
}
shards[shardIdx].push(weightArray[i])
if (shards[shardIdx].length >= maxLength) {
shardIdx += 1
}
}
return shards.map(shardArray => new Uint8Array(shardArray))
}
async function quantizeAndSave() {
const modelName = 'tiny_yolov2'
const tinyYolov2 = new faceapi.TinyYolov2()
await tinyYolov2.load(await loadNetWeights(`uncompressed/tiny_yolov2_model.weights`))
const quantizedTensorArrays = []
const weightEntries = []
tinyYolov2.getParamList().forEach(({ path, tensor }) => {
const { scale, min, qdata } = quantizeWeights(tensor)
const weightEntry = {
name : path,
shape: tensor.shape,
dtype: tensor.dtype,
quantization: { dtype: 'uint8', scale, min }
}
console.log({ scale, min })
quantizedTensorArrays.push(qdata)
weightEntries.push(weightEntry)
})
const quantizedWeights = quantizedTensorArrays
.map(typedArray => Array.from(typedArray))
.reduce((flat, arr) => flat.concat(arr))
const shards = makeShards(quantizedWeights)
console.log('num shards: ', shards.length)
const paths = []
shards.forEach((shardData, i) => {
const shardName = `${modelName}_model-shard${i + 1}`
paths.push(shardName)
saveAs(new Blob([shardData]), shardName)
})
const weightManifest = [{
weights: weightEntries,
paths
}]
saveAs(new Blob([JSON.stringify(weightManifest)]), `${modelName}_model-weights_manifest.json`)
}
</script>
</body>
</html>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment