google-gemini
diff --git a/‎Demos/PaliGemma2-on-Web/README.md‎
Lines changed: 41 additions & 0 deletions b/‎Demos/PaliGemma2-on-Web/README.md‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎Demos/PaliGemma2-on-Web/assets/paligemma2-onnx-output.gif‎
12 MB b/‎Demos/PaliGemma2-on-Web/assets/paligemma2-onnx-output.gif‎
12 MB
diff --git a/‎Demos/PaliGemma2-on-Web/assets/paligemma2-onnx-pipeline.png‎
67 KB b/‎Demos/PaliGemma2-on-Web/assets/paligemma2-onnx-pipeline.png‎
67 KB
diff --git a/‎Demos/PaliGemma2-on-Web/package.json‎
Lines changed: 28 additions & 0 deletions b/‎Demos/PaliGemma2-on-Web/package.json‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎Demos/PaliGemma2-on-Web/public/index.html‎
Lines changed: 41 additions & 0 deletions b/‎Demos/PaliGemma2-on-Web/public/index.html‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎Demos/PaliGemma2-on-Web/public/script.js‎
Lines changed: 186 additions & 0 deletions b/‎Demos/PaliGemma2-on-Web/public/script.js‎
Lines changed: 186 additions & 0 deletions
@@ -0,0 +1,41 @@
+### Developed by [Nitin Tiwari](https://linkedin.com/in/tiwari-nitin).
+
+# Inference PaliGemma 2 on the browser with ONNX & Transformers.js
+This project is an implementation of inferencing the paligemma2-3b-mix-224 model on the browser using its converted ONNX weights and Hugging Face Transformers.js.
+
+## PaliGemma 2 to ONNX Conversion:
+![Logo](assets/paligemma2-onnx-pipeline.png)
+
+
+## Steps to run:
+
+1. Clone the repository on your local machine.
+2. Navigate to `gemma-cookbook/Demos/PaliGemma2-on-Web` directory.
+3. Run `npm install` to install the Node.js packages.
+4. Run `node server.js` to start the server.
+5. Open `localhost:3000` on your web browser and start inferencing with PaliGemma 2.
+
+> [!NOTE]  
+> For the first time, it will take around 10-15 minutes to load the model weights.
+
+## Results:
+![Logo](assets/paligemma2-onnx-output.gif)
+
+
+## Resources & References
+
+1. [Google DeepMind PaliGemma 2](https://developers.googleblog.com/en/introducing-paligemma-2-mix/)
+2. Colab Notebooks: 
+<table>
+  <tr>
+    <td><b>Convert and quantize PaliGemma 2 to ONNX</b></td>
+    <td><a target="_blank" href="https://colab.research.google.com/github/NSTiwari/PaliGemma2-ONNX-Transformers.js/blob/main/Convert_PaliGemma2_to_ONNX.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td>
+  </tr>
+  <tr>
+    <td><b>Inference PaliGemma 2 with Transformers.js</b></td>
+    <td><a target="_blank" href="https://colab.research.google.com/github/NSTiwari/PaliGemma2-ONNX-Transformers.js/blob/main/Inference_PaliGemma2_with_Transformers_js.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td>
+  </tr>
+</table>
+
+3. [**Medium Blog**](https://medium.com/@tiwarinitin1999/inference-paligemma-2-with-transformers-js-5545986ac14a) for step-by-step implementation.
+4. [ONNX Community](https://huggingface.co/onnx-community)
@@ -0,0 +1,28 @@
+{
+  "name": "paligemma2-onnx-transformers.js",
+  "version": "1.0.0",
+  "main": "server.js",
+  "type": "module",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1",
+    "start": "node server.js"
+  },
+  "keywords": [],
+  "author": "Nitin Tiwari",
+  "license": "MIT",
+  "description": "Inference PaliGemma 2 on the browser using ONNX weights, and Transformers.js.",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js.git"
+  },
+  "bugs": {
+    "url": "https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js/issues"
+  },
+  "homepage": "https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js#readme",
+  "dependencies": {
+    "@huggingface/transformers": "^3.3.3",
+    "canvas": "^3.1.0",
+    "express": "^4.21.2",
+    "server.js": "^1.0.0"
+  }
+}
@@ -0,0 +1,41 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Inference PaliGemma 2 with 🤗 Transformers.js</title>
+    <link rel="stylesheet" href="style.css">
+</head>
+
+<body>
+    <div class="container">
+        <header>
+            <h1>Inference PaliGemma 2 with 🤗 Transformers.js</h1>
+        </header>
+
+        <section class="image-section">
+            <div class="image-container">
+                <img id="originalImage" src="" alt="Original Image" style="display: none;">
+                <canvas id="processedCanvas" width="0" height="0"></canvas>
+            </div>
+        </section>
+
+        <section class="input-section">
+            <div class="file-upload">
+                <label for="imageUpload">Choose File</label>
+                <input type="file" id="imageUpload" accept="image/*" style="display:none;" />
+            </div>
+            <input type="text" id="promptInput" placeholder="Enter your prompt (eg: detect car)" />
+            <button id="processButton" disabled>Analyze Image</button>
+        </section>
+
+        <section class="status-section">
+            <p id="responseText" style="display: none;"></p>
+        </section>
+    </div>
+
+    <script src="script.js"></script>
+</body>
+
+</html>
@@ -0,0 +1,186 @@
+document.addEventListener('DOMContentLoaded', () => {
+    const imageUpload = document.getElementById('imageUpload');
+    const processButton = document.getElementById('processButton');
+    const originalImage = document.getElementById('originalImage');
+    const processedCanvas = document.getElementById('processedCanvas');
+    const promptInput = document.getElementById('promptInput');
+    const responseTextDiv = document.getElementById('responseText');
+    const ctx = processedCanvas.getContext('2d');
+
+    let imageBase64 = '';
+    let originalImageURL = '';
+    let originalImageObj;
+
+    // Initially hide the original image
+    originalImage.style.display = 'none';
+
+    // Handle image upload
+    imageUpload.addEventListener('change', (event) => {
+        const file = event.target.files[0];
+        if (file) {
+            const reader = new FileReader();
+            reader.onload = (e) => {
+                imageBase64 = e.target.result.split(',')[1];
+                originalImageURL = e.target.result;
+                originalImage.src = originalImageURL;
+
+                originalImageObj = new Image();
+                originalImageObj.onload = () => {
+                    // Keep aspect ratio while scaling
+                    const originalWidth = originalImageObj.width;
+                    const originalHeight = originalImageObj.height;
+
+                    // Maximum dimensions for the display area
+                    const maxWidth = 600;
+                    const maxHeight = 400;
+
+                    // Calculate aspect ratio
+                    const aspectRatio = originalWidth / originalHeight;
+
+                    let displayWidth = maxWidth;
+                    let displayHeight = maxWidth / aspectRatio;
+
+                    if (displayHeight > maxHeight) {
+                        displayHeight = maxHeight;
+                        displayWidth = maxHeight * aspectRatio;
+                    }
+
+                    processedCanvas.width = displayWidth;
+                    processedCanvas.height = displayHeight;
+                    ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
+                    ctx.drawImage(originalImageObj, 0, 0, displayWidth, displayHeight); // Draw the original image on the canvas
+
+                    // Show the original image after it's loaded for preview
+                    originalImage.style.display = 'block';
+                };
+                originalImageObj.src = originalImageURL;
+
+                // Enable the process button
+                processButton.disabled = false;
+                responseTextDiv.style.display = 'none';
+                responseTextDiv.innerHTML = '';
+                promptInput.value = '';
+            };
+            reader.readAsDataURL(file);
+        }
+    });
+
+    // Handle process button click
+    processButton.addEventListener('click', async () => {
+        if (!imageBase64) {
+            alert("Please upload an image first.");
+            return;
+        }
+
+        // Clear off previous results.
+        ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
+        responseTextDiv.innerHTML = 'Analyzing...';
+        responseTextDiv.style.display = 'block';
+
+
+        let prompt = promptInput.value || "";
+        if (prompt.toLowerCase().includes("detect")) {
+            const labelMatch = prompt.match(/detect\s+(.*)/i);
+            const label = labelMatch ? labelMatch[1] : 'Unknown';
+            prompt = `<image>detect ${label}`;
+        } else {
+            prompt = `<image>${prompt}`;
+        }
+
+        try {
+            const response = await fetch('/process-image', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                },
+                body: JSON.stringify({ image: imageBase64, prompt: prompt, targetWidth: processedCanvas.width, targetHeight: processedCanvas.height, originalWidth: originalImageObj.width, originalHeight: originalImageObj.height }),
+            });
+
+            if (response.ok) {
+                const data = await response.json();
+
+                if (data.success) {
+                    if (prompt.includes("<image>detect")) {
+                        const { boundingBox } = data;
+                        drawBoundingBox(boundingBox, ctx)
+
+                        responseTextDiv.style.display = 'block';
+                        responseTextDiv.innerHTML = "Response: " + escapeHtml(data.message);
+                    }
+                    else {
+                        processedCanvas.width = 0;
+                        processedCanvas.height = 0;
+                        ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
+                        responseTextDiv.style.display = 'block';
+                        responseTextDiv.innerHTML = data.message;
+                    }
+                }
+                else {
+                    processedCanvas.width = 0;
+                    processedCanvas.height = 0;
+                    ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
+                    responseTextDiv.style.display = 'block';
+                    responseTextDiv.innerHTML = "Response: " + data.message;
+                }
+            }
+            else {
+                alert('Error processing image.');
+            }
+
+        } catch (error) {
+            console.error('Error:', error);
+            alert('Error processing image.');
+        } finally {
+
+        }
+    });
+
+
+    // Function to draw the bounding box on canvas
+    function drawBoundingBox(boundingBox, ctx) {
+
+        const { x1, y1, x2, y2, label } = boundingBox;
+
+        // Generate random color for the bounding box and label background
+        const randomColor = getRandomColor();
+
+        // Set styles for the bounding box (random color stroke)
+        ctx.strokeStyle = randomColor;
+        ctx.lineWidth = 5;
+        ctx.strokeRect(x1, y1, x2 - x1, y2 - y1);
+
+        // Adjust label background height to fit the text properly
+        const labelPadding = 10;
+        const textWidth = ctx.measureText(label.charAt(0).toUpperCase() + label.slice(1)).width;
+        const labelWidth = textWidth * 3;
+        const labelHeight = 30;
+        const labelY = y1 - labelHeight;
+
+        // Draw background for the label (same random color as bounding box)
+        ctx.fillStyle = randomColor;
+        ctx.fillRect(x1, labelY, labelWidth, labelHeight);
+
+        // Set the text color to white
+        ctx.fillStyle = "white";
+        ctx.font = "bold 20px Arial";
+        ctx.fillText(label.charAt(0).toUpperCase() + label.slice(1), x1 + labelPadding, labelY + labelHeight - labelPadding);
+    }
+
+    // Function to generate a random RGB color
+    function getRandomColor() {
+        const r = Math.floor(Math.random() * 256);
+        const g = Math.floor(Math.random() * 256);
+        const b = Math.floor(Math.random() * 256);
+        return `rgb(${r},${g},${b})`;
+    }
+
+    function escapeHtml(unsafe) {
+        return unsafe
+            .replace(/&/g, "&amp;")
+            .replace(/</g, "&lt;")
+            .replace(/>/g, "&gt;")
+            .replace(/"/g, "&quot;")
+            .replace(/'/g, "&#039;");
+    }
+    
+});