Skip to content

Commit 8e25ef7

Browse files
authored
Merge branch 'main' into main
2 parents 53ad12e + 6e44b97 commit 8e25ef7

File tree

11 files changed

+679
-113
lines changed

11 files changed

+679
-113
lines changed

Demos/PaliGemma2-on-Web/README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
### Developed by [Nitin Tiwari](https://linkedin.com/in/tiwari-nitin).
2+
3+
# Inference PaliGemma 2 on the browser with ONNX & Transformers.js
4+
This project is an implementation of inferencing the paligemma2-3b-mix-224 model on the browser using its converted ONNX weights and Hugging Face Transformers.js.
5+
6+
## PaliGemma 2 to ONNX Conversion:
7+
![Logo](assets/paligemma2-onnx-pipeline.png)
8+
9+
10+
## Steps to run:
11+
12+
1. Clone the repository on your local machine.
13+
2. Navigate to `gemma-cookbook/Demos/PaliGemma2-on-Web` directory.
14+
3. Run `npm install` to install the Node.js packages.
15+
4. Run `node server.js` to start the server.
16+
5. Open `localhost:3000` on your web browser and start inferencing with PaliGemma 2.
17+
18+
> [!NOTE]
19+
> For the first time, it will take around 10-15 minutes to load the model weights.
20+
21+
## Results:
22+
![Logo](assets/paligemma2-onnx-output.gif)
23+
24+
25+
## Resources & References
26+
27+
1. [Google DeepMind PaliGemma 2](https://developers.googleblog.com/en/introducing-paligemma-2-mix/)
28+
2. Colab Notebooks:
29+
<table>
30+
<tr>
31+
<td><b>Convert and quantize PaliGemma 2 to ONNX</b></td>
32+
<td><a target="_blank" href="https://colab.research.google.com/github/NSTiwari/PaliGemma2-ONNX-Transformers.js/blob/main/Convert_PaliGemma2_to_ONNX.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td>
33+
</tr>
34+
<tr>
35+
<td><b>Inference PaliGemma 2 with Transformers.js</b></td>
36+
<td><a target="_blank" href="https://colab.research.google.com/github/NSTiwari/PaliGemma2-ONNX-Transformers.js/blob/main/Inference_PaliGemma2_with_Transformers_js.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td>
37+
</tr>
38+
</table>
39+
40+
3. [**Medium Blog**](https://medium.com/@tiwarinitin1999/inference-paligemma-2-with-transformers-js-5545986ac14a) for step-by-step implementation.
41+
4. [ONNX Community](https://huggingface.co/onnx-community)
12 MB
Loading
67 KB
Loading
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"name": "paligemma2-onnx-transformers.js",
3+
"version": "1.0.0",
4+
"main": "server.js",
5+
"type": "module",
6+
"scripts": {
7+
"test": "echo \"Error: no test specified\" && exit 1",
8+
"start": "node server.js"
9+
},
10+
"keywords": [],
11+
"author": "Nitin Tiwari",
12+
"license": "MIT",
13+
"description": "Inference PaliGemma 2 on the browser using ONNX weights, and Transformers.js.",
14+
"repository": {
15+
"type": "git",
16+
"url": "git+https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js.git"
17+
},
18+
"bugs": {
19+
"url": "https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js/issues"
20+
},
21+
"homepage": "https://github.com/NSTiwari/PaliGemma2-ONNX-Transformers.js#readme",
22+
"dependencies": {
23+
"@huggingface/transformers": "^3.3.3",
24+
"canvas": "^3.1.0",
25+
"express": "^4.21.2",
26+
"server.js": "^1.0.0"
27+
}
28+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
4+
<head>
5+
<meta charset="UTF-8">
6+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
7+
<title>Inference PaliGemma 2 with 🤗 Transformers.js</title>
8+
<link rel="stylesheet" href="style.css">
9+
</head>
10+
11+
<body>
12+
<div class="container">
13+
<header>
14+
<h1>Inference PaliGemma 2 with 🤗 Transformers.js</h1>
15+
</header>
16+
17+
<section class="image-section">
18+
<div class="image-container">
19+
<img id="originalImage" src="" alt="Original Image" style="display: none;">
20+
<canvas id="processedCanvas" width="0" height="0"></canvas>
21+
</div>
22+
</section>
23+
24+
<section class="input-section">
25+
<div class="file-upload">
26+
<label for="imageUpload">Choose File</label>
27+
<input type="file" id="imageUpload" accept="image/*" style="display:none;" />
28+
</div>
29+
<input type="text" id="promptInput" placeholder="Enter your prompt (eg: detect car)" />
30+
<button id="processButton" disabled>Analyze Image</button>
31+
</section>
32+
33+
<section class="status-section">
34+
<p id="responseText" style="display: none;"></p>
35+
</section>
36+
</div>
37+
38+
<script src="script.js"></script>
39+
</body>
40+
41+
</html>
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
document.addEventListener('DOMContentLoaded', () => {
2+
const imageUpload = document.getElementById('imageUpload');
3+
const processButton = document.getElementById('processButton');
4+
const originalImage = document.getElementById('originalImage');
5+
const processedCanvas = document.getElementById('processedCanvas');
6+
const promptInput = document.getElementById('promptInput');
7+
const responseTextDiv = document.getElementById('responseText');
8+
const ctx = processedCanvas.getContext('2d');
9+
10+
let imageBase64 = '';
11+
let originalImageURL = '';
12+
let originalImageObj;
13+
14+
// Initially hide the original image
15+
originalImage.style.display = 'none';
16+
17+
// Handle image upload
18+
imageUpload.addEventListener('change', (event) => {
19+
const file = event.target.files[0];
20+
if (file) {
21+
const reader = new FileReader();
22+
reader.onload = (e) => {
23+
imageBase64 = e.target.result.split(',')[1];
24+
originalImageURL = e.target.result;
25+
originalImage.src = originalImageURL;
26+
27+
originalImageObj = new Image();
28+
originalImageObj.onload = () => {
29+
// Keep aspect ratio while scaling
30+
const originalWidth = originalImageObj.width;
31+
const originalHeight = originalImageObj.height;
32+
33+
// Maximum dimensions for the display area
34+
const maxWidth = 600;
35+
const maxHeight = 400;
36+
37+
// Calculate aspect ratio
38+
const aspectRatio = originalWidth / originalHeight;
39+
40+
let displayWidth = maxWidth;
41+
let displayHeight = maxWidth / aspectRatio;
42+
43+
if (displayHeight > maxHeight) {
44+
displayHeight = maxHeight;
45+
displayWidth = maxHeight * aspectRatio;
46+
}
47+
48+
processedCanvas.width = displayWidth;
49+
processedCanvas.height = displayHeight;
50+
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
51+
ctx.drawImage(originalImageObj, 0, 0, displayWidth, displayHeight); // Draw the original image on the canvas
52+
53+
// Show the original image after it's loaded for preview
54+
originalImage.style.display = 'block';
55+
};
56+
originalImageObj.src = originalImageURL;
57+
58+
// Enable the process button
59+
processButton.disabled = false;
60+
responseTextDiv.style.display = 'none';
61+
responseTextDiv.innerHTML = '';
62+
promptInput.value = '';
63+
};
64+
reader.readAsDataURL(file);
65+
}
66+
});
67+
68+
// Handle process button click
69+
processButton.addEventListener('click', async () => {
70+
if (!imageBase64) {
71+
alert("Please upload an image first.");
72+
return;
73+
}
74+
75+
// Clear off previous results.
76+
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
77+
responseTextDiv.innerHTML = 'Analyzing...';
78+
responseTextDiv.style.display = 'block';
79+
80+
81+
let prompt = promptInput.value || "";
82+
if (prompt.toLowerCase().includes("detect")) {
83+
const labelMatch = prompt.match(/detect\s+(.*)/i);
84+
const label = labelMatch ? labelMatch[1] : 'Unknown';
85+
prompt = `<image>detect ${label}`;
86+
} else {
87+
prompt = `<image>${prompt}`;
88+
}
89+
90+
try {
91+
const response = await fetch('/process-image', {
92+
method: 'POST',
93+
headers: {
94+
'Content-Type': 'application/json',
95+
},
96+
body: JSON.stringify({ image: imageBase64, prompt: prompt, targetWidth: processedCanvas.width, targetHeight: processedCanvas.height, originalWidth: originalImageObj.width, originalHeight: originalImageObj.height }),
97+
});
98+
99+
if (response.ok) {
100+
const data = await response.json();
101+
102+
if (data.success) {
103+
if (prompt.includes("<image>detect")) {
104+
const { boundingBox } = data;
105+
drawBoundingBox(boundingBox, ctx)
106+
107+
responseTextDiv.style.display = 'block';
108+
responseTextDiv.innerHTML = "Response: " + escapeHtml(data.message);
109+
}
110+
else {
111+
processedCanvas.width = 0;
112+
processedCanvas.height = 0;
113+
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
114+
responseTextDiv.style.display = 'block';
115+
responseTextDiv.innerHTML = data.message;
116+
}
117+
}
118+
else {
119+
processedCanvas.width = 0;
120+
processedCanvas.height = 0;
121+
ctx.clearRect(0, 0, processedCanvas.width, processedCanvas.height);
122+
responseTextDiv.style.display = 'block';
123+
responseTextDiv.innerHTML = "Response: " + data.message;
124+
}
125+
}
126+
else {
127+
alert('Error processing image.');
128+
}
129+
130+
} catch (error) {
131+
console.error('Error:', error);
132+
alert('Error processing image.');
133+
} finally {
134+
135+
}
136+
});
137+
138+
139+
// Function to draw the bounding box on canvas
140+
function drawBoundingBox(boundingBox, ctx) {
141+
142+
const { x1, y1, x2, y2, label } = boundingBox;
143+
144+
// Generate random color for the bounding box and label background
145+
const randomColor = getRandomColor();
146+
147+
// Set styles for the bounding box (random color stroke)
148+
ctx.strokeStyle = randomColor;
149+
ctx.lineWidth = 5;
150+
ctx.strokeRect(x1, y1, x2 - x1, y2 - y1);
151+
152+
// Adjust label background height to fit the text properly
153+
const labelPadding = 10;
154+
const textWidth = ctx.measureText(label.charAt(0).toUpperCase() + label.slice(1)).width;
155+
const labelWidth = textWidth * 3;
156+
const labelHeight = 30;
157+
const labelY = y1 - labelHeight;
158+
159+
// Draw background for the label (same random color as bounding box)
160+
ctx.fillStyle = randomColor;
161+
ctx.fillRect(x1, labelY, labelWidth, labelHeight);
162+
163+
// Set the text color to white
164+
ctx.fillStyle = "white";
165+
ctx.font = "bold 20px Arial";
166+
ctx.fillText(label.charAt(0).toUpperCase() + label.slice(1), x1 + labelPadding, labelY + labelHeight - labelPadding);
167+
}
168+
169+
// Function to generate a random RGB color
170+
function getRandomColor() {
171+
const r = Math.floor(Math.random() * 256);
172+
const g = Math.floor(Math.random() * 256);
173+
const b = Math.floor(Math.random() * 256);
174+
return `rgb(${r},${g},${b})`;
175+
}
176+
177+
function escapeHtml(unsafe) {
178+
return unsafe
179+
.replace(/&/g, "&amp;")
180+
.replace(/</g, "&lt;")
181+
.replace(/>/g, "&gt;")
182+
.replace(/"/g, "&quot;")
183+
.replace(/'/g, "&#039;");
184+
}
185+
186+
});

0 commit comments

Comments
 (0)