Skip to content

Commit a3baec0

Browse files
committed
Merge branch 'main' into ch-hybrid-node-fix
2 parents 0d85038 + 8209266 commit a3baec0

26 files changed

+654
-88
lines changed

.changeset/angry-apples-think.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@firebase/auth': patch
3+
---
4+
5+
Expose `browserCookiePersistence` beta feature in public typings.

.changeset/dull-ligers-bow.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
'firebase': minor
3+
'@firebase/ai': minor
4+
---
5+
6+
Deprecate `sendMediaChunks()` and `sendMediaStream()`. Instead, use the new methods added to the `LiveSession` class.
7+
Add `sendTextRealtime()`, `sendAudioReatime()`, and `sendVideoRealtime()` to the `LiveSession` class.

.changeset/rare-hats-know.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@firebase/ai': patch
3+
---
4+
5+
Fix logic for merging default `onDeviceParams` with user-provided `onDeviceParams`.

.changeset/smooth-parrots-speak.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
'@firebase/ai': minor
3+
'firebase': minor
4+
---
5+
6+
Add `inferenceSource` to the response from `generateContent` and `generateContentStream`. This property indicates whether on-device or in-cloud inference was used to generate the result.

common/api-review/ai.api.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,8 @@ export { Date_2 as Date }
256256
// @public
257257
export interface EnhancedGenerateContentResponse extends GenerateContentResponse {
258258
functionCalls: () => FunctionCall[] | undefined;
259+
// @beta
260+
inferenceSource?: InferenceSource;
259261
inlineDataParts: () => InlineDataPart[] | undefined;
260262
text: () => string;
261263
thoughtSummary: () => string | undefined;
@@ -816,6 +818,15 @@ export const InferenceMode: {
816818
// @beta
817819
export type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode];
818820

821+
// @beta
822+
export const InferenceSource: {
823+
readonly ON_DEVICE: "on_device";
824+
readonly IN_CLOUD: "in_cloud";
825+
};
826+
827+
// @beta
828+
export type InferenceSource = (typeof InferenceSource)[keyof typeof InferenceSource];
829+
819830
// @public
820831
export interface InlineDataPart {
821832
// (undocumented)
@@ -994,9 +1005,14 @@ export class LiveSession {
9941005
isClosed: boolean;
9951006
receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation>;
9961007
send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
1008+
sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
9971009
sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
1010+
// @deprecated
9981011
sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
1012+
// @deprecated (undocumented)
9991013
sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
1014+
sendTextRealtime(text: string): Promise<void>;
1015+
sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
10001016
}
10011017

10021018
// @public

docs-devsite/ai.enhancedgeneratecontentresponse.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ export interface EnhancedGenerateContentResponse extends GenerateContentResponse
2424
| Property | Type | Description |
2525
| --- | --- | --- |
2626
| [functionCalls](./ai.enhancedgeneratecontentresponse.md#enhancedgeneratecontentresponsefunctioncalls) | () =&gt; [FunctionCall](./ai.functioncall.md#functioncall_interface)<!-- -->\[\] \| undefined | Aggregates and returns every [FunctionCall](./ai.functioncall.md#functioncall_interface) from the first candidate of [GenerateContentResponse](./ai.generatecontentresponse.md#generatecontentresponse_interface)<!-- -->. |
27+
| [inferenceSource](./ai.enhancedgeneratecontentresponse.md#enhancedgeneratecontentresponseinferencesource) | [InferenceSource](./ai.md#inferencesource) | <b><i>(Public Preview)</i></b> Indicates whether inference happened on-device or in-cloud. |
2728
| [inlineDataParts](./ai.enhancedgeneratecontentresponse.md#enhancedgeneratecontentresponseinlinedataparts) | () =&gt; [InlineDataPart](./ai.inlinedatapart.md#inlinedatapart_interface)<!-- -->\[\] \| undefined | Aggregates and returns every [InlineDataPart](./ai.inlinedatapart.md#inlinedatapart_interface) from the first candidate of [GenerateContentResponse](./ai.generatecontentresponse.md#generatecontentresponse_interface)<!-- -->. |
2829
| [text](./ai.enhancedgeneratecontentresponse.md#enhancedgeneratecontentresponsetext) | () =&gt; string | Returns the text string from the response, if available. Throws if the prompt or candidate was blocked. |
2930
| [thoughtSummary](./ai.enhancedgeneratecontentresponse.md#enhancedgeneratecontentresponsethoughtsummary) | () =&gt; string \| undefined | Aggregates and returns every [TextPart](./ai.textpart.md#textpart_interface) with their <code>thought</code> property set to <code>true</code> from the first candidate of [GenerateContentResponse](./ai.generatecontentresponse.md#generatecontentresponse_interface)<!-- -->. |
@@ -38,6 +39,19 @@ Aggregates and returns every [FunctionCall](./ai.functioncall.md#functioncall_in
3839
functionCalls: () => FunctionCall[] | undefined;
3940
```
4041
42+
## EnhancedGenerateContentResponse.inferenceSource
43+
44+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
45+
>
46+
47+
Indicates whether inference happened on-device or in-cloud.
48+
49+
<b>Signature:</b>
50+
51+
```typescript
52+
inferenceSource?: InferenceSource;
53+
```
54+
4155
## EnhancedGenerateContentResponse.inlineDataParts
4256
4357
Aggregates and returns every [InlineDataPart](./ai.inlinedatapart.md#inlinedatapart_interface) from the first candidate of [GenerateContentResponse](./ai.generatecontentresponse.md#generatecontentresponse_interface)<!-- -->.

docs-devsite/ai.livesession.md

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,12 @@ export declare class LiveSession
3939
| [close()](./ai.livesession.md#livesessionclose) | | <b><i>(Public Preview)</i></b> Closes this session. All methods on this session will throw an error once this resolves. |
4040
| [receive()](./ai.livesession.md#livesessionreceive) | | <b><i>(Public Preview)</i></b> Yields messages received from the server. This can only be used by one consumer at a time. |
4141
| [send(request, turnComplete)](./ai.livesession.md#livesessionsend) | | <b><i>(Public Preview)</i></b> Sends content to the server. |
42+
| [sendAudioRealtime(blob)](./ai.livesession.md#livesessionsendaudiorealtime) | | <b><i>(Public Preview)</i></b> Sends audio data to the server in realtime. |
4243
| [sendFunctionResponses(functionResponses)](./ai.livesession.md#livesessionsendfunctionresponses) | | <b><i>(Public Preview)</i></b> Sends function responses to the server. |
4344
| [sendMediaChunks(mediaChunks)](./ai.livesession.md#livesessionsendmediachunks) | | <b><i>(Public Preview)</i></b> Sends realtime input to the server. |
44-
| [sendMediaStream(mediaChunkStream)](./ai.livesession.md#livesessionsendmediastream) | | <b><i>(Public Preview)</i></b> Sends a stream of [GenerativeContentBlob](./ai.generativecontentblob.md#generativecontentblob_interface)<!-- -->. |
45+
| [sendMediaStream(mediaChunkStream)](./ai.livesession.md#livesessionsendmediastream) | | <b><i>(Public Preview)</i></b> |
46+
| [sendTextRealtime(text)](./ai.livesession.md#livesessionsendtextrealtime) | | <b><i>(Public Preview)</i></b> Sends text to the server in realtime. |
47+
| [sendVideoRealtime(blob)](./ai.livesession.md#livesessionsendvideorealtime) | | <b><i>(Public Preview)</i></b> Sends video data to the server in realtime. |
4548

4649
## LiveSession.inConversation
4750

@@ -135,6 +138,45 @@ Promise&lt;void&gt;
135138

136139
If this session has been closed.
137140

141+
## LiveSession.sendAudioRealtime()
142+
143+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
144+
>
145+
146+
Sends audio data to the server in realtime.
147+
148+
The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz little-endian.
149+
150+
<b>Signature:</b>
151+
152+
```typescript
153+
sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
154+
```
155+
156+
#### Parameters
157+
158+
| Parameter | Type | Description |
159+
| --- | --- | --- |
160+
| blob | [GenerativeContentBlob](./ai.generativecontentblob.md#generativecontentblob_interface) | The base64-encoded PCM data to send to the server in realtime. |
161+
162+
<b>Returns:</b>
163+
164+
Promise&lt;void&gt;
165+
166+
#### Exceptions
167+
168+
If this session has been closed.
169+
170+
### Example
171+
172+
173+
```javascript
174+
// const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
175+
const blob = { mimeType: "audio/pcm", data: pcmData };
176+
liveSession.sendAudioRealtime(blob);
177+
178+
```
179+
138180
## LiveSession.sendFunctionResponses()
139181

140182
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
@@ -167,6 +209,11 @@ If this session has been closed.
167209
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
168210
>
169211
212+
> Warning: This API is now obsolete.
213+
>
214+
> Use `sendTextRealtime()`<!-- -->, `sendAudioRealtime()`<!-- -->, and `sendVideoRealtime()` instead.
215+
>
216+
170217
Sends realtime input to the server.
171218

172219
<b>Signature:</b>
@@ -194,7 +241,12 @@ If this session has been closed.
194241
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
195242
>
196243
197-
Sends a stream of [GenerativeContentBlob](./ai.generativecontentblob.md#generativecontentblob_interface)<!-- -->.
244+
> Warning: This API is now obsolete.
245+
>
246+
> Use `sendTextRealtime()`<!-- -->, `sendAudioRealtime()`<!-- -->, and `sendVideoRealtime()` instead.
247+
>
248+
> Sends a stream of [GenerativeContentBlob](./ai.generativecontentblob.md#generativecontentblob_interface)<!-- -->.
249+
>
198250
199251
<b>Signature:</b>
200252

@@ -216,3 +268,77 @@ Promise&lt;void&gt;
216268

217269
If this session has been closed.
218270

271+
## LiveSession.sendTextRealtime()
272+
273+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
274+
>
275+
276+
Sends text to the server in realtime.
277+
278+
<b>Signature:</b>
279+
280+
```typescript
281+
sendTextRealtime(text: string): Promise<void>;
282+
```
283+
284+
#### Parameters
285+
286+
| Parameter | Type | Description |
287+
| --- | --- | --- |
288+
| text | string | The text data to send. |
289+
290+
<b>Returns:</b>
291+
292+
Promise&lt;void&gt;
293+
294+
#### Exceptions
295+
296+
If this session has been closed.
297+
298+
### Example
299+
300+
301+
```javascript
302+
liveSession.sendTextRealtime("Hello, how are you?");
303+
304+
```
305+
306+
## LiveSession.sendVideoRealtime()
307+
308+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
309+
>
310+
311+
Sends video data to the server in realtime.
312+
313+
The server requires that the video is sent as individual video frames at 1 FPS. It is recommended to set `mimeType` to `image/jpeg`<!-- -->.
314+
315+
<b>Signature:</b>
316+
317+
```typescript
318+
sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
319+
```
320+
321+
#### Parameters
322+
323+
| Parameter | Type | Description |
324+
| --- | --- | --- |
325+
| blob | [GenerativeContentBlob](./ai.generativecontentblob.md#generativecontentblob_interface) | The base64-encoded video data to send to the server in realtime. |
326+
327+
<b>Returns:</b>
328+
329+
Promise&lt;void&gt;
330+
331+
#### Exceptions
332+
333+
If this session has been closed.
334+
335+
### Example
336+
337+
338+
```javascript
339+
// const videoFrame = ... base64-encoded JPEG data
340+
const blob = { mimeType: "image/jpeg", data: videoFrame };
341+
liveSession.sendVideoRealtime(blob);
342+
343+
```
344+

docs-devsite/ai.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ The Firebase AI Web SDK.
162162
| [ImagenPersonFilterLevel](./ai.md#imagenpersonfilterlevel) | A filter level controlling whether generation of images containing people or faces is allowed.<!-- -->See the <a href="http://firebase.google.com/docs/vertex-ai/generate-images">personGeneration</a> documentation for more details. |
163163
| [ImagenSafetyFilterLevel](./ai.md#imagensafetyfilterlevel) | A filter level controlling how aggressively to filter sensitive content.<!-- -->Text prompts provided as inputs and images (generated or uploaded) through Imagen on Vertex AI are assessed against a list of safety filters, which include 'harmful categories' (for example, <code>violence</code>, <code>sexual</code>, <code>derogatory</code>, and <code>toxic</code>). This filter level controls how aggressively to filter out potentially harmful content from responses. See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) and the [Responsible AI and usage guidelines](https://cloud.google.com/vertex-ai/generative-ai/docs/image/responsible-ai-imagen#safety-filters) for more details. |
164164
| [InferenceMode](./ai.md#inferencemode) | <b><i>(Public Preview)</i></b> Determines whether inference happens on-device or in-cloud. |
165+
| [InferenceSource](./ai.md#inferencesource) | <b><i>(Public Preview)</i></b> Indicates whether inference happened on-device or in-cloud. |
165166
| [Language](./ai.md#language) | <b><i>(Public Preview)</i></b> The programming language of the code. |
166167
| [LiveResponseType](./ai.md#liveresponsetype) | <b><i>(Public Preview)</i></b> The types of responses that can be returned by [LiveSession.receive()](./ai.livesession.md#livesessionreceive)<!-- -->. |
167168
| [Modality](./ai.md#modality) | Content part modality. |
@@ -189,6 +190,7 @@ The Firebase AI Web SDK.
189190
| [ImagenPersonFilterLevel](./ai.md#imagenpersonfilterlevel) | A filter level controlling whether generation of images containing people or faces is allowed.<!-- -->See the <a href="http://firebase.google.com/docs/vertex-ai/generate-images">personGeneration</a> documentation for more details. |
190191
| [ImagenSafetyFilterLevel](./ai.md#imagensafetyfilterlevel) | A filter level controlling how aggressively to filter sensitive content.<!-- -->Text prompts provided as inputs and images (generated or uploaded) through Imagen on Vertex AI are assessed against a list of safety filters, which include 'harmful categories' (for example, <code>violence</code>, <code>sexual</code>, <code>derogatory</code>, and <code>toxic</code>). This filter level controls how aggressively to filter out potentially harmful content from responses. See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) and the [Responsible AI and usage guidelines](https://cloud.google.com/vertex-ai/generative-ai/docs/image/responsible-ai-imagen#safety-filters) for more details. |
191192
| [InferenceMode](./ai.md#inferencemode) | <b><i>(Public Preview)</i></b> Determines whether inference happens on-device or in-cloud. |
193+
| [InferenceSource](./ai.md#inferencesource) | <b><i>(Public Preview)</i></b> Indicates whether inference happened on-device or in-cloud. |
192194
| [Language](./ai.md#language) | <b><i>(Public Preview)</i></b> The programming language of the code. |
193195
| [LanguageModelMessageContentValue](./ai.md#languagemodelmessagecontentvalue) | <b><i>(Public Preview)</i></b> Content formats that can be provided as on-device message content. |
194196
| [LanguageModelMessageRole](./ai.md#languagemodelmessagerole) | <b><i>(Public Preview)</i></b> Allowable roles for on-device language model usage. |
@@ -643,6 +645,22 @@ InferenceMode: {
643645
}
644646
```
645647

648+
## InferenceSource
649+
650+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
651+
>
652+
653+
Indicates whether inference happened on-device or in-cloud.
654+
655+
<b>Signature:</b>
656+
657+
```typescript
658+
InferenceSource: {
659+
readonly ON_DEVICE: "on_device";
660+
readonly IN_CLOUD: "in_cloud";
661+
}
662+
```
663+
646664
## Language
647665

648666
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
@@ -926,6 +944,19 @@ Determines whether inference happens on-device or in-cloud.
926944
export type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode];
927945
```
928946

947+
## InferenceSource
948+
949+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
950+
>
951+
952+
Indicates whether inference happened on-device or in-cloud.
953+
954+
<b>Signature:</b>
955+
956+
```typescript
957+
export type InferenceSource = (typeof InferenceSource)[keyof typeof InferenceSource];
958+
```
959+
929960
## Language
930961

931962
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.

packages/ai/integration/live.test.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,45 @@ describe('Live', function () {
154154
});
155155
});
156156

157+
describe('sendTextRealtime()', () => {
158+
it('should send a single text chunk and receive a response', async () => {
159+
const model = getLiveGenerativeModel(testConfig.ai, {
160+
model: testConfig.model,
161+
generationConfig: textLiveGenerationConfig
162+
});
163+
const session = await model.connect();
164+
const responsePromise = nextTurnText(session.receive());
165+
166+
await session.sendTextRealtime('Are you an AI? Yes or No.');
167+
168+
const responseText = await responsePromise;
169+
expect(responseText).to.include('Yes');
170+
171+
await session.close();
172+
});
173+
});
174+
175+
describe('sendAudioRealtime()', () => {
176+
it('should send a single audio chunk and receive a response', async () => {
177+
const model = getLiveGenerativeModel(testConfig.ai, {
178+
model: testConfig.model,
179+
generationConfig: textLiveGenerationConfig
180+
});
181+
const session = await model.connect();
182+
const responsePromise = nextTurnText(session.receive());
183+
184+
await session.sendAudioRealtime({
185+
data: HELLO_AUDIO_PCM_BASE64, // "Hey, can you hear me?"
186+
mimeType: 'audio/pcm'
187+
});
188+
189+
const responseText = await responsePromise;
190+
expect(responseText).to.include('Yes');
191+
192+
await session.close();
193+
});
194+
});
195+
157196
describe('sendMediaChunks()', () => {
158197
it('should send a single audio chunk and receive a response', async () => {
159198
const model = getLiveGenerativeModel(testConfig.ai, {

0 commit comments

Comments
 (0)