tools for weight quantization

b4f4729f · vincent · e95dd35d · b4f4729f · b4f4729f · b4f4729f
Commit b4f4729f authored Jul 31, 2018 by vincent
5 changed files
--- a/tools/quantize/package-lock.json
+++ b/tools/quantize/package-lock.json
--- a/tools/quantize/package.json
+++ b/tools/quantize/package.json
+{
+  "scripts": {
+    "start": "node server.js"
+  },
+  "author": "justadudewhohacks",
+  "license": "MIT",
+  "dependencies": {
+    "express": "^4.16.3",
+    "file-saver": "^1.3.8"
+  }
+}
--- a/tools/quantize/public/quantization.js
+++ b/tools/quantize/public/quantization.js
+function getQuantizationRange(min, max, qdtype) {
+  if (qdtype !== 0 && qdtype !== 1) {
+    throw new Error('qdtype !== 0 && qdtype !== 1: ' + qdtype)
+  }
+  const quantMax = qdtype === 0 ? 255 : 65535
+  const scale = (max - min) / quantMax
+  let result = { scale, min, max }
+  if (min <= 0 && 0 <= max) {
+    const quantizedZeroPoint = (0 - min) / scale
+    const nudgedZeroPoint = Math.round(quantizedZeroPoint)
+    result.min = (-nudgedZeroPoint) * scale
+    result.max = quantMax * scale + result.min
+  }
+  return result
+}
+function quantizeWeights(tensor, qdtype = 0) {
+  const min = tensor.min().dataSync()[0]
+  const max = tensor.max().dataSync()[0]
+  if (min === max) {
+    return {
+      scale: 1.0,
+      min,
+      qdata: qdtype === 0 ? new Uint8Array(tensor.size) : new Uint16Array(tensor.size)
+    }
+  }
+  const q = getQuantizationRange(min, max, qdtype)
+  const qdata = tf.round(tf.clipByValue(tensor, q.min, q.max).sub(tf.scalar(q.min)).div(tf.scalar(q.scale))).dataSync()
+  return {
+    scale: q.scale,
+    min: q.min,
+    qdata: qdtype === 0 ? new Uint8Array(qdata) : new Uint16Array(qdata)
+  }
+}
+function dequantizeWeights(qdata, scale, min) {
+  if (qdata.qdata && qdata.scale && qdata.min) {
+    return Float32Array.from(qdata.qdata, v => (v * qdata.scale) + qdata.min)
+  }
+  return Float32Array.from(qdata, v => (v * scale) + min)
+}
--- a/tools/quantize/server.js
+++ b/tools/quantize/server.js
+const express = require('express')
+const path = require('path')
+const app = express()
+const viewsDir = path.join(__dirname, 'views')
+app.use(express.static(viewsDir))
+app.use(express.static(path.join(__dirname, './public')))
+app.use(express.static(path.join(__dirname, './node_modules/file-saver')))
+app.use(express.static(path.join(__dirname, '../../examples/public')))
+app.use(express.static(path.join(__dirname, '../../weights')))
+app.use(express.static(path.join(__dirname, '../../dist')))
+app.get('/', (req, res) => res.redirect('/quantize_tiny_yolov2'))
+app.get('/quantize_tiny_yolov2', (req, res) => res.sendFile(path.join(viewsDir, 'quantizeTinyYolov2.html')))
+app.listen(3000, () => console.log('Listening on port 3000!'))
\ No newline at end of file
--- a/tools/quantize/views/quantizeTinyYolov2.html
+++ b/tools/quantize/views/quantizeTinyYolov2.html
+<!DOCTYPE html>
+<html>
+<head>
+  <script src="face-api.js"></script>
+  <script src="commons.js"></script>
+  <script src="FileSaver.js"></script>
+  <script src="quantization.js"></script>
+</head>
+<body>
+  <script>
+    tf = faceapi.tf
+    async function loadNetWeights(uri) {
+      return new Float32Array(await (await fetch(uri)).arrayBuffer())
+    }
+    function makeShards(weightArray) {
+      const maxLength = 4096 * 1024
+      const shards = []
+      let shardIdx = 0
+      for (let i = 0; i < weightArray.length; i++) {
+        if (!shards[shardIdx]) {
+          shards[shardIdx] = []
+        }
+        shards[shardIdx].push(weightArray[i])
+        if (shards[shardIdx].length >= maxLength) {
+          shardIdx += 1
+        }
+      }
+      return shards.map(shardArray => new Uint8Array(shardArray))
+    }
+    async function quantizeAndSave() {
+      const modelName = 'tiny_yolov2'
+      const tinyYolov2 = new faceapi.TinyYolov2()
+      await tinyYolov2.load(await loadNetWeights(`uncompressed/tiny_yolov2_model.weights`))
+      const quantizedTensorArrays = []
+      const weightEntries = []
+      tinyYolov2.getParamList().forEach(({ path, tensor }) => {
+        const { scale, min, qdata } = quantizeWeights(tensor)
+        const weightEntry = {
+          name : path,
+          shape: tensor.shape,
+          dtype: tensor.dtype,
+          quantization: { dtype: 'uint8', scale, min }
+        }
+        console.log({ scale, min })
+        quantizedTensorArrays.push(qdata)
+        weightEntries.push(weightEntry)
+      })
+      const quantizedWeights = quantizedTensorArrays
+        .map(typedArray => Array.from(typedArray))
+        .reduce((flat, arr) => flat.concat(arr))
+      const shards = makeShards(quantizedWeights)
+      console.log('num shards: ', shards.length)
+      const paths = []
+      shards.forEach((shardData, i) => {
+        const shardName = `${modelName}_model-shard${i + 1}`
+        paths.push(shardName)
+        saveAs(new Blob([shardData]), shardName)
+      })
+      const weightManifest = [{
+        weights: weightEntries,
+        paths
+      }]
+      saveAs(new Blob([JSON.stringify(weightManifest)]), `${modelName}_model-weights_manifest.json`)
+    }
+  </script>
+</body>
+</html>
\ No newline at end of file