Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions samples/js/text_generation/benchmark_genai.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ async function main() {
pipe = await LLMPipeline(modelsPath, device, { schedulerConfig: schedulerConfig });
}

const inputData = await pipe.getTokenizer().encode(prompt);
const promptTokenSize = inputData.input_ids.shape[1];
console.log(`Prompt token size: ${promptTokenSize}`);

for (let i = 0; i < numWarmup; i++) {
await pipe.generate(prompt, config);
}
Expand Down
1 change: 1 addition & 0 deletions src/js/include/addon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ struct AddonData {
Napi::FunctionReference tokenizer;
Napi::FunctionReference perf_metrics;
Napi::FunctionReference chat_history;
Napi::ObjectReference openvino_addon;
};

void init_class(Napi::Env env,
Expand Down
14 changes: 14 additions & 0 deletions src/js/include/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,13 @@ ov::AnyMap js_to_cpp<ov::AnyMap>(const Napi::Env& env, const Napi::Value& value)
/** @brief A template specialization for TargetType std::string */
template <>
std::string js_to_cpp<std::string>(const Napi::Env& env, const Napi::Value& value);
template <>
int64_t js_to_cpp<int64_t>(const Napi::Env& env, const Napi::Value& value);
/** @brief A template specialization for TargetType std::vector<std::string> */
template <>
std::vector<std::string> js_to_cpp<std::vector<std::string>>(const Napi::Env& env, const Napi::Value& value);
template <>
std::vector<int64_t> js_to_cpp<std::vector<int64_t>>(const Napi::Env& env, const Napi::Value& value);
/** @brief A template specialization for TargetType GenerateInputs */
template <>
GenerateInputs js_to_cpp<GenerateInputs>(const Napi::Env& env, const Napi::Value& value);
Expand All @@ -58,6 +62,8 @@ ov::genai::StructuredOutputConfig::Tag js_to_cpp<ov::genai::StructuredOutputConf
/** @brief A template specialization for TargetType ov::genai::StructuredOutputConfig::StructuralTag */
template <>
ov::genai::StructuredOutputConfig::StructuralTag js_to_cpp<ov::genai::StructuredOutputConfig::StructuralTag>(const Napi::Env& env, const Napi::Value& value);
template <>
ov::Tensor js_to_cpp<ov::Tensor>(const Napi::Env& env, const Napi::Value& value);
/**
* @brief Unwraps a C++ object from a JavaScript wrapper.
* @tparam TargetType The C++ class type to extract.
Expand Down Expand Up @@ -110,6 +116,12 @@ Napi::Value cpp_to_js<std::vector<size_t>, Napi::Value>(const Napi::Env& env, co

template <>
Napi::Value cpp_to_js<ov::genai::JsonContainer, Napi::Value>(const Napi::Env& env, const ov::genai::JsonContainer& json_container);

template <>
Napi::Value cpp_to_js<ov::Tensor, Napi::Value>(const Napi::Env& env, const ov::Tensor& tensor);

template <>
Napi::Value cpp_to_js<ov::genai::TokenizedInputs, Napi::Value>(const Napi::Env& env, const ov::genai::TokenizedInputs& tokenized_inputs);
/**
* @brief Template function to convert C++ map into Javascript Object. Map key must be std::string.
* @tparam MapElementType C++ data type of map elements.
Expand All @@ -130,3 +142,5 @@ bool is_chat_history(const Napi::Env& env, const Napi::Value& value);
std::string json_stringify(const Napi::Env& env, const Napi::Value& value);

Napi::Value json_parse(const Napi::Env& env, const std::string& value);

Napi::Function get_prototype_from_ov_addon(const Napi::Env& env, const std::string& ctor_name);
6 changes: 6 additions & 0 deletions src/js/include/tokenizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ class TokenizerWrapper : public Napi::ObjectWrap<TokenizerWrapper> {
Napi::Value get_eos_token_id(const Napi::CallbackInfo& info);
Napi::Value get_pad_token(const Napi::CallbackInfo& info);
Napi::Value get_pad_token_id(const Napi::CallbackInfo& info);
Napi::Value get_chat_template(const Napi::CallbackInfo& info);
Napi::Value get_original_chat_template(const Napi::CallbackInfo& info);
Napi::Value set_chat_template(const Napi::CallbackInfo& info);
Napi::Value supports_paired_input(const Napi::CallbackInfo& info);
Napi::Value encode(const Napi::CallbackInfo& info);
Napi::Value decode(const Napi::CallbackInfo& info);
private:
ov::genai::Tokenizer _tokenizer;
};
6 changes: 4 additions & 2 deletions src/js/lib/addon.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { createRequire } from "module";
import { platform } from "node:os";
import { join, dirname, resolve } from "node:path";
import type { ChatHistory as IChatHistory } from "./chatHistory.js";
import { addon as ovAddon } from "openvino-node";

export type EmbeddingResult = Float32Array | Int8Array | Uint8Array;
export type EmbeddingResults = Float32Array[] | Int8Array[] | Uint8Array[];
Expand Down Expand Up @@ -60,6 +61,7 @@ interface OpenVINOGenAIAddon {
TextEmbeddingPipeline: TextEmbeddingPipelineWrapper;
LLMPipeline: any;
ChatHistory: IChatHistory;
setOpenvinoAddon: (ovAddon: any) => void;
}

// We need to use delayed import to get an updated Path if required
Expand All @@ -78,7 +80,7 @@ function getGenAIAddon(): OpenVINOGenAIAddon {
}

const addon = getGenAIAddon();
addon.setOpenvinoAddon(ovAddon);

export const { ChatHistory } = addon;
export const { TextEmbeddingPipeline, LLMPipeline, ChatHistory } = addon;
export type ChatHistory = IChatHistory;
export default addon;
1 change: 1 addition & 0 deletions src/js/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ export const { LLMPipeline, TextEmbeddingPipeline } = PipelineFactory;
export { DecodedResults } from "./pipelines/llmPipeline.js";
export * from "./utils.js";
export * from "./addon.js";
export * from "./tokenizer.js";
22 changes: 3 additions & 19 deletions src/js/lib/pipelines/llmPipeline.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,14 @@
import util from "node:util";
import addon, { ChatHistory } from "../addon.js";
import { ChatHistory, LLMPipeline as LLMPipelineWrap } from "../addon.js";
import { GenerationConfig, StreamingStatus, LLMPipelineProperties } from "../utils.js";
import { Tokenizer } from "../tokenizer.js";

export type ResolveFunction = (arg: { value: string; done: boolean }) => void;
export type Options = {
disableStreamer?: boolean;
max_new_tokens?: number;
};

interface Tokenizer {
/** Applies a chat template to format chat history into a prompt string. */
applyChatTemplate(
chatHistory: Record<string, any>[] | ChatHistory,
addGenerationPrompt: boolean,
chatTemplate?: string,
tools?: Record<string, any>[],
extraContext?: Record<string, any>,
): string;
getBosToken(): string;
getBosTokenId(): number;
getEosToken(): string;
getEosTokenId(): number;
getPadToken(): string;
getPadTokenId(): number;
}

/** Structure with raw performance metrics for each generation before any statistics are calculated. */
export type RawMetrics = {
/** Durations for each generate call in milliseconds. */
Expand Down Expand Up @@ -167,7 +151,7 @@ export class LLMPipeline {
async init() {
if (this.isInitialized) throw new Error("LLMPipeline is already initialized");

this.pipeline = new addon.LLMPipeline();
this.pipeline = new LLMPipelineWrap();

const initPromise = util.promisify(this.pipeline.init.bind(this.pipeline));
const result = await initPromise(this.modelPath, this.device, this.properties);
Expand Down
5 changes: 3 additions & 2 deletions src/js/lib/pipelines/textEmbeddingPipeline.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import util from "node:util";
import addon, {
import {
TextEmbeddingPipelineWrapper,
EmbeddingResult,
EmbeddingResults,
TextEmbeddingConfig,
TextEmbeddingPipeline as TextEmbeddingPipelineWrap,
} from "../addon.js";

export class TextEmbeddingPipeline {
Expand All @@ -29,7 +30,7 @@ export class TextEmbeddingPipeline {
async init() {
if (this.pipeline) throw new Error("TextEmbeddingPipeline is already initialized");

this.pipeline = new addon.TextEmbeddingPipeline();
this.pipeline = new TextEmbeddingPipelineWrap();

const initPromise = util.promisify(this.pipeline.init.bind(this.pipeline));
await initPromise(this.modelPath, this.device, this.config, this.ovProperties);
Expand Down
186 changes: 186 additions & 0 deletions src/js/lib/tokenizer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
// Copyright (C) 2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

import { Tensor } from "openvino-node";
import { ChatHistory } from "./addon.js";

/**
* TokenizedInputs contains input_ids and attention_mask tensors.
* This is the result of encoding prompts using the Tokenizer.
*/
export interface TokenizedInputs {
/** Tensor containing token IDs for the encoded input */
input_ids: Tensor;
/** Tensor containing attention mask (1 for real tokens, 0 for padding) */
attention_mask: Tensor;
}

/**
* Options for encode method.
*/
export interface EncodeOptions {
/**
* Whether to add special tokens like BOS, EOS, PAD.
* @defaultValue true
*/
addSpecialTokens?: boolean;

/**
* Whether to pad the sequence to the maximum length.
* @defaultValue false
*/
padToMaxLength?: boolean;

/**
* Maximum length of the sequence.
* If undefined, the value will be taken from the IR.
*/
maxLength?: number;

/**
* Side to pad the sequence, can be 'left' or 'right'.
* If undefined, the value will be taken from the IR.
*/
paddingSide?: "left" | "right";
}

/**
* The Tokenizer class is used to encode prompts and decode resulting tokens.
*
* Chat template is initialized from sources in the following order, overriding the previous value:
* 1. chat_template entry from tokenizer_config.json
* 2. chat_template entry from processor_config.json
* 3. chat_template entry from chat_template.json
* 4. chat_template entry from rt_info section of openvino.Model
* 5. If the template is known to be not supported by GenAI, it's replaced with a simplified supported version.
*/
export interface Tokenizer {
/**
* Applies a chat template to format chat history into a prompt string.
* @param chatHistory - chat history as an array of message objects or ChatHistory instance
* @param addGenerationPrompt - whether to add a generation prompt at the end
* @param chatTemplate - optional custom chat template to use instead of the default
* @param tools - optional array of tool definitions for function calling
* @param extraContext - optional extra context object for custom template variables
* @returns formatted prompt string
*/
applyChatTemplate(
chatHistory: Record<string, any>[] | ChatHistory,
addGenerationPrompt: boolean,
chatTemplate?: string,
tools?: Record<string, any>[],
extraContext?: Record<string, any>,
): string;
Comment on lines 108 to 114
Copy link

Copilot AI Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace generic 'any' types with more specific types. Consider using 'ChatMessage[]' for chatHistory parameter, 'ToolDefinition[]' for tools, and 'ExtraContext' for extraContext, which are exported from './chatHistory.js'.

Copilot uses AI. Check for mistakes.

/**
* Encodes a single prompt or a list of prompts into tokenized inputs.
* @param prompts - single prompt string or array of prompts
* @param options - encoding options
* @returns TokenizedInputs object containing input_ids and attention_mask tensors.
*/
encode(prompts: string | string[], options?: EncodeOptions): TokenizedInputs;

/**
* Encodes two lists of prompts into tokenized inputs (for paired input).
* The number of strings must be the same, or one of the inputs can contain one string.
* In the latter case, the single-string input will be broadcast into the shape of the other input,
* which is more efficient than repeating the string in pairs.
* @param prompts1 - first list of prompts to encode
* @param prompts2 - second list of prompts to encode
* @param options - encoding options
* @returns TokenizedInputs object containing input_ids and attention_mask tensors.
*/
encode(prompts1: string[], prompts2: string[], options?: EncodeOptions): TokenizedInputs;

/**
* Encodes a list of paired prompts into tokenized inputs.
* Input format is same as for HF paired input [[prompt_1, prompt_2], ...].
* @param prompts - list of paired prompts to encode
* @param options - encoding options
* @returns TokenizedInputs object containing input_ids and attention_mask tensors.
*/
encode(prompts: [string, string][], options?: EncodeOptions): TokenizedInputs;

// TODO: move decode options to another interface
/**
* Decode a sequence of token IDs into a string prompt.
* @param tokens - sequence of token IDs to decode
* @param skipSpecialTokens - whether to skip special tokens. Default is true.
* @returns decoded string.
*/
decode(tokens: number[], skipSpecialTokens?: boolean): string;

/**
* Decode a batch of token sequences (as Tensor or array of arrays) into a list of string prompts.
* @param tokens - tensor containing token IDs or batch of token ID sequences
* @param skipSpecialTokens - whether to skip special tokens. Default is true.
* @returns list of decoded strings.
*/
decode(tokens: Tensor | number[][], skipSpecialTokens?: boolean): string[];

/**
* Returns the BOS (Beginning of Sequence) token string.
* @returns BOS token string
*/
getBosToken(): string;

/**
* Returns the BOS (Beginning of Sequence) token ID.
* @returns BOS token ID
*/
getBosTokenId(): number;

/**
* Returns the EOS (End of Sequence) token string.
* @returns EOS token string
*/
getEosToken(): string;

/**
* Returns the EOS (End of Sequence) token ID.
* @returns EOS token ID
*/
getEosTokenId(): number;

/**
* Returns the PAD (Padding) token string.
* @returns PAD token string
*/
getPadToken(): string;

/**
* Returns the PAD (Padding) token ID.
* @returns PAD token ID
*/
getPadTokenId(): number;

/**
* Returns the current chat template string.
* @returns current chat template string
*/
getChatTemplate(): string;

/**
* Returns the original chat template from the tokenizer configuration.
* @returns original chat template string
*/
getOriginalChatTemplate(): string;

/**
* Override a chat template read from tokenizer_config.json.
* @param chatTemplate - custom chat template string to use
*/
setChatTemplate(chatTemplate: string): void;

/**
* Returns true if the tokenizer supports paired input, false otherwise.
* @returns whether the tokenizer supports paired input
*/
supportsPairedInput(): boolean;

/**
* The current chat template string.
* Can be used to get or set the chat template.
*/
chatTemplate: string;
}
25 changes: 25 additions & 0 deletions src/js/src/addon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,28 @@ void init_class(Napi::Env env,
exports.Set(class_name, prototype);
}

Napi::Value init_ov_addon(const Napi::CallbackInfo& info) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Napi::Value init_ov_addon(const Napi::CallbackInfo& info) {
void init_ov_addon(const Napi::CallbackInfo& info) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is used as a JavaScript function, so we usually return a Napi::Value even if nothing is returned. I understand your confusion because it is called like init_class, but they have different usages. To be clearer, I will rename this function to set_ov_node.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe there are functions that return Napi::Value even if they are void, but I do not think we should repeat that pattern. Here it pop_back is used as js function and it's void

void ChatHistoryWrap::pop_back(const Napi::CallbackInfo& info) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, it has been updated and let's use this approach in the next changes.

Napi::Env env = info.Env();
if (info.Length() < 1) {
Napi::TypeError::New(env, "setOpenvinoAddon expects one argument").ThrowAsJavaScriptException();
return env.Undefined();
}
if (info[0].IsUndefined() || info[0].IsNull() || !info[0].IsObject()) {
Napi::TypeError::New(env, "Passed addon must be an object").ThrowAsJavaScriptException();
return env.Undefined();
}

auto addon_data = env.GetInstanceData<AddonData>();
if (!addon_data) {
Napi::TypeError::New(env, "Addon data is not initialized").ThrowAsJavaScriptException();
return env.Undefined();
}

auto ov_addon = info[0].As<Napi::Object>();
addon_data->openvino_addon = Napi::Persistent(ov_addon);
return env.Undefined();
}

// Define the addon initialization function
Napi::Object init_module(Napi::Env env, Napi::Object exports) {
auto addon_data = new AddonData();
Expand All @@ -31,6 +53,9 @@ Napi::Object init_module(Napi::Env env, Napi::Object exports) {
init_class(env, exports, "PerfMetrics", &PerfMetricsWrapper::get_class, addon_data->perf_metrics);
init_class(env, exports, "ChatHistory", &ChatHistoryWrap::get_class, addon_data->chat_history);

// Expose a helper to set the openvino-node addon from JS (useful for ESM)
exports.Set("setOpenvinoAddon", Napi::Function::New(env, init_ov_addon));

return exports;
}

Expand Down
Loading
Loading