Machine Learning in the Browser with ml5.js

1. Why ml5.js?

Simplicity: Friendly APIs that hide low-level tensor operations.
Runs in the browser: No server required; models run client-side using WebGL acceleration.
Creative coding focus: Works great with p5.js for interactive sketches, art, and education.
Privacy: Your data stays on device, ideal for demos and workshops.

2. Setup

Add ml5.js (and optionally p5.js) via CDN script tags:

<!-- p5.js for easy sketches (optional but common with ml5) -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/p5.js/1.9.0/p5.min.js"></script>

<!-- ml5.js (includes TensorFlow.js under the hood) -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/ml5/1.5.0/ml5.min.js"></script>

<!-- Your script -->
<script src="js/app.js"></script>

3. Image Classification (MobileNet)

A minimal example that classifies an image using a pre-trained MobileNet model.

<img id="img" src="https://www.example.com/img/YellowLabradorLooking_new.jpg" width="200" />
<pre id="result">Loading...</pre>

<script>
  // Wait until ml5 is loaded
  const classifier = ml5.imageClassifier('MobileNet', modelLoaded);

  function modelLoaded() {
    const img = document.getElementById('img');
    classifier.classify(img, (err, results) => {
      if (err) {
        console.error(err);
        document.getElementById('result').textContent = 'Error: ' + err.message;
        return;
      }
      const {label, confidence} = results[0];
      document.getElementById('result').textContent = `${label} (${(confidence*100).toFixed(1)}%)`;
    });
  }
</script>

4. Pose Detection (PoseNet) with p5.js

Use the webcam to detect human poses. This snippet sets up a p5.js sketch and overlays keypoints.

let video, poseNet, poses = [];

function setup() {
  createCanvas(640, 480);
  video = createCapture(VIDEO);
  video.size(width, height);
  video.hide();

  poseNet = ml5.poseNet(video, () => console.log('PoseNet ready'));
  poseNet.on('pose', results => poses = results);
}

function draw() {
  image(video, 0, 0, width, height);
  drawKeypoints();
}

function drawKeypoints() {
  for (const pose of poses) {
    for (const kp of pose.pose.keypoints) {
      if (kp.score > 0.2) {
        fill(0, 255, 0);
        noStroke();
        circle(kp.position.x, kp.position.y, 8);
      }
    }
  }
}

5. Sound Classification (SpeechCommands)

Classify simple voice commands with a pre-trained model.

let classifier;
let label = 'listening...';

function preload() {
  classifier = ml5.soundClassifier('SpeechCommands18w', { probabilityThreshold: 0.85 });
}

function setup() {
  createCanvas(400, 200);
  classifier.classify(gotResult);
}

function gotResult(err, results) {
  if (err) return console.error(err);
  label = results[0].label;
}

function draw() {
  background(240);
  textAlign(CENTER, CENTER);
  textSize(24);
  text(label, width/2, height/2);
}

6. Tips and Best Practices

HTTPS + Permissions: Webcam and microphone access require HTTPS (or localhost) and user permission.
Model size: Pre-trained models are downloaded at runtime; ensure a decent network connection for demos.
Performance: Close other GPU-intensive tabs; reduce video resolution if pose detection is slow.
Ethics and privacy: Always inform users when capturing audio/video and process data locally when possible.