Skip to content

Commit 05181e2

Browse files
authored
transcript store enhancements (#1879)
* feat(transcript): preserve and reindex partial hints after filter Update transcript listener to correctly handle partial words and their associated hints when older partial words are removed due to final segments. Replace inline filtering logic with a two-step approach: - collect existing partial words into a variable and filter them into remainingPartialWords - build a mapping from old word indices to new indices for the kept partial words, then filter and remap partialHints accordingly This prevents mismatches where hints still point to removed word indices and ensures hints reference the correct remaining partial word after filtering. Add a unit test that simulates partial then final responses and asserts that remaining partial words and hints are consistent and correctly reindexed. * Make partial speaker hints channel-aware Avoid cross-channel hint mis-assignment by scoping partial speaker hints to their channel. The change renames partialHints to partialHintsByChannel: a Record<number, RuntimeSpeakerHint[]>, updates initial state and all reads/writes to use partialHintsByChannel[channelIndex], and adjusts filtering/remapping and reset logic to operate on per-channel hint arrays. Tests and a component consumer were updated to read from partialHintsByChannel as well. * Fix hint wordIndex alignment across channels Flatten remainingWords once and reindex per-channel hints so RuntimeSpeakerHint.wordIndex refers to positions in the flattened array. Previously hints were flattened independently and kept per-channel indices, which misaligned hints for channels >0. This change computes offsets for each channel by accumulating prior partialWords lengths, adjusts each hint.wordIndex by its channel offset, and passes the flattened words and reindexed hints to handlePersist. * Reindex flattened speaker hints by cumulative word offsets Flattening partial hints across channels left each hint.wordIndex relative to its original channel, producing incorrect indices when channels are concatenated. Compute cumulative word offsets for each channel (summing the lengths of prior channels' partialWords) and reindex each RuntimeSpeakerHint by adding the channel's offset. Also import the RuntimeSpeakerHint type so reindexed hints have the correct shape.
1 parent 09824ef commit 05181e2

File tree

3 files changed

+179
-25
lines changed

3 files changed

+179
-25
lines changed

apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/index.tsx

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { cn } from "@hypr/utils";
44

55
import { useListener } from "../../../../../../../contexts/listener";
66
import * as main from "../../../../../../../store/tinybase/main";
7+
import type { RuntimeSpeakerHint } from "../../../../../../../utils/segment";
78
import { useAutoScroll, useScrollDetection } from "./hooks";
89
import { Operations } from "./operations";
910
import { RenderTranscript } from "./render-transcript";
@@ -32,7 +33,32 @@ export function TranscriptContainer({
3233
const partialWords = useListener((state) =>
3334
Object.values(state.partialWordsByChannel).flat(),
3435
);
35-
const partialHints = useListener((state) => state.partialHints);
36+
const partialHints = useListener((state) => {
37+
const channelIndices = Object.keys(state.partialWordsByChannel)
38+
.map(Number)
39+
.sort((a, b) => a - b);
40+
41+
const offsetByChannel = new Map<number, number>();
42+
let currentOffset = 0;
43+
for (const channelIndex of channelIndices) {
44+
offsetByChannel.set(channelIndex, currentOffset);
45+
currentOffset += state.partialWordsByChannel[channelIndex]?.length ?? 0;
46+
}
47+
48+
const reindexedHints: RuntimeSpeakerHint[] = [];
49+
for (const channelIndex of channelIndices) {
50+
const hints = state.partialHintsByChannel[channelIndex] ?? [];
51+
const offset = offsetByChannel.get(channelIndex) ?? 0;
52+
for (const hint of hints) {
53+
reindexedHints.push({
54+
...hint,
55+
wordIndex: hint.wordIndex + offset,
56+
});
57+
}
58+
}
59+
60+
return reindexedHints;
61+
});
3662

3763
const containerRef = useRef<HTMLDivElement>(null);
3864
const [scrollElement, setScrollElement] = useState<HTMLDivElement | null>(

apps/desktop/src/store/zustand/listener/transcript.test.ts

Lines changed: 93 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,9 @@ describe("transcript slice", () => {
101101
" Another",
102102
" problem",
103103
]);
104-
expect(stateAfterFirst.partialHints).toHaveLength(2);
105-
expect(stateAfterFirst.partialHints[0]?.wordIndex).toBe(0);
106-
expect(stateAfterFirst.partialHints[1]?.wordIndex).toBe(1);
104+
expect(stateAfterFirst.partialHintsByChannel[0]).toHaveLength(2);
105+
expect(stateAfterFirst.partialHintsByChannel[0]?.[0]?.wordIndex).toBe(0);
106+
expect(stateAfterFirst.partialHintsByChannel[0]?.[1]?.wordIndex).toBe(1);
107107

108108
const extendedPartial = createResponse({
109109
words: [
@@ -132,9 +132,9 @@ describe("transcript slice", () => {
132132
" problem",
133133
" exists",
134134
]);
135-
expect(stateAfterSecond.partialHints).toHaveLength(3);
136-
const lastPartialHint =
137-
stateAfterSecond.partialHints[stateAfterSecond.partialHints.length - 1];
135+
const channelHints = stateAfterSecond.partialHintsByChannel[0] ?? [];
136+
expect(channelHints).toHaveLength(3);
137+
const lastPartialHint = channelHints[channelHints.length - 1];
138138
expect(lastPartialHint?.wordIndex).toBe(2);
139139
});
140140

@@ -187,4 +187,91 @@ describe("transcript slice", () => {
187187
expect(persist).toHaveBeenCalledTimes(1);
188188
expect(store.getState().finalWordsMaxEndMsByChannel[0]).toBe(1500);
189189
});
190+
191+
test("adjusts partial hint indices after filtering partial words", () => {
192+
const persist = vi.fn();
193+
store.getState().setTranscriptPersist(persist);
194+
195+
const partialResponse = createResponse({
196+
words: [
197+
{
198+
word: "hello",
199+
punctuated_word: "Hello",
200+
start: 0,
201+
end: 0.5,
202+
confidence: 1,
203+
speaker: 0,
204+
language: "en",
205+
},
206+
{
207+
word: "world",
208+
punctuated_word: "world",
209+
start: 0.5,
210+
end: 1.0,
211+
confidence: 1,
212+
speaker: 1,
213+
language: "en",
214+
},
215+
{
216+
word: "test",
217+
punctuated_word: "test",
218+
start: 1.1,
219+
end: 1.5,
220+
confidence: 1,
221+
speaker: 0,
222+
language: "en",
223+
},
224+
],
225+
transcript: "Hello world test",
226+
isFinal: false,
227+
});
228+
229+
store.getState().handleTranscriptResponse(partialResponse);
230+
231+
const stateAfterPartial = store.getState();
232+
expect(stateAfterPartial.partialWordsByChannel[0]).toHaveLength(3);
233+
expect(stateAfterPartial.partialHintsByChannel[0]).toHaveLength(3);
234+
235+
const finalResponse = createResponse({
236+
words: [
237+
{
238+
word: "hello",
239+
punctuated_word: "Hello",
240+
start: 0,
241+
end: 0.5,
242+
confidence: 1,
243+
speaker: 0,
244+
language: "en",
245+
},
246+
{
247+
word: "world",
248+
punctuated_word: "world",
249+
start: 0.5,
250+
end: 1.0,
251+
confidence: 1,
252+
speaker: 1,
253+
language: "en",
254+
},
255+
],
256+
transcript: "Hello world",
257+
isFinal: true,
258+
});
259+
260+
store.getState().handleTranscriptResponse(finalResponse);
261+
262+
const stateAfterFinal = store.getState();
263+
const remainingPartialWords = stateAfterFinal.partialWordsByChannel[0];
264+
const remainingHints = stateAfterFinal.partialHintsByChannel[0] ?? [];
265+
266+
expect(remainingPartialWords).toHaveLength(1);
267+
expect(remainingPartialWords?.[0]?.text).toBe(" test");
268+
269+
expect(remainingHints).toHaveLength(1);
270+
expect(remainingHints[0]?.wordIndex).toBe(0);
271+
272+
const hintedWord =
273+
remainingPartialWords?.[remainingHints[0]?.wordIndex ?? -1];
274+
expect(hintedWord).toBeDefined();
275+
expect(hintedWord?.text).toBe(" test");
276+
});
190277
});

apps/desktop/src/store/zustand/listener/transcript.ts

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ export type HandlePersistCallback = (
1616
export type TranscriptState = {
1717
finalWordsMaxEndMsByChannel: Record<number, number>;
1818
partialWordsByChannel: WordsByChannel;
19-
partialHints: RuntimeSpeakerHint[];
19+
partialHintsByChannel: Record<number, RuntimeSpeakerHint[]>;
2020
handlePersist?: HandlePersistCallback;
2121
};
2222

@@ -29,7 +29,7 @@ export type TranscriptActions = {
2929
const initialState: TranscriptState = {
3030
finalWordsMaxEndMsByChannel: {},
3131
partialWordsByChannel: {},
32-
partialHints: [],
32+
partialHintsByChannel: {},
3333
handlePersist: undefined,
3434
};
3535

@@ -46,7 +46,7 @@ export const createTranscriptSlice = <
4646
): void => {
4747
const {
4848
partialWordsByChannel,
49-
partialHints,
49+
partialHintsByChannel,
5050
handlePersist,
5151
finalWordsMaxEndMsByChannel,
5252
} = get();
@@ -69,20 +69,32 @@ export const createTranscriptSlice = <
6969
wordIndex: hint.wordIndex - firstNewWordIndex,
7070
}));
7171

72-
const remainingPartialWords = (
73-
partialWordsByChannel[channelIndex] ?? []
74-
).filter((word) => word.start_ms > lastEndMs);
72+
const existingPartialWords = partialWordsByChannel[channelIndex] ?? [];
73+
const remainingPartialWords = existingPartialWords.filter(
74+
(word) => word.start_ms > lastEndMs,
75+
);
7576

76-
const remainingPartialHints = partialHints.filter((hint) => {
77-
const partialWords = partialWordsByChannel[channelIndex] ?? [];
78-
const word = partialWords[hint.wordIndex];
79-
return word && word.start_ms > lastEndMs;
80-
});
77+
const oldToNewIndex = new Map<number, number>();
78+
let newIdx = 0;
79+
for (let oldIdx = 0; oldIdx < existingPartialWords.length; oldIdx++) {
80+
if (existingPartialWords[oldIdx].start_ms > lastEndMs) {
81+
oldToNewIndex.set(oldIdx, newIdx);
82+
newIdx++;
83+
}
84+
}
85+
86+
const existingPartialHints = partialHintsByChannel[channelIndex] ?? [];
87+
const remainingPartialHints = existingPartialHints
88+
.filter((hint) => oldToNewIndex.has(hint.wordIndex))
89+
.map((hint) => ({
90+
...hint,
91+
wordIndex: oldToNewIndex.get(hint.wordIndex)!,
92+
}));
8193

8294
set((state) =>
8395
mutate(state, (draft) => {
8496
draft.partialWordsByChannel[channelIndex] = remainingPartialWords;
85-
draft.partialHints = remainingPartialHints;
97+
draft.partialHintsByChannel[channelIndex] = remainingPartialHints;
8698
draft.finalWordsMaxEndMsByChannel[channelIndex] = lastEndMs;
8799
}),
88100
);
@@ -95,7 +107,7 @@ export const createTranscriptSlice = <
95107
words: WordLike[],
96108
hints: RuntimeSpeakerHint[],
97109
): void => {
98-
const { partialWordsByChannel, partialHints } = get();
110+
const { partialWordsByChannel, partialHintsByChannel } = get();
99111
const existing = partialWordsByChannel[channelIndex] ?? [];
100112

101113
const firstStartMs = getFirstStartMs(words);
@@ -113,7 +125,8 @@ export const createTranscriptSlice = <
113125
wordIndex: before.length + hint.wordIndex,
114126
}));
115127

116-
const filteredOldHints = partialHints.filter((hint) => {
128+
const existingHints = partialHintsByChannel[channelIndex] ?? [];
129+
const filteredOldHints = existingHints.filter((hint) => {
117130
const word = existing[hint.wordIndex];
118131
return (
119132
word && (word.end_ms <= firstStartMs || word.start_ms >= lastEndMs)
@@ -123,7 +136,10 @@ export const createTranscriptSlice = <
123136
set((state) =>
124137
mutate(state, (draft) => {
125138
draft.partialWordsByChannel[channelIndex] = newWords;
126-
draft.partialHints = [...filteredOldHints, ...hintsWithAdjustedIndices];
139+
draft.partialHintsByChannel[channelIndex] = [
140+
...filteredOldHints,
141+
...hintsWithAdjustedIndices,
142+
];
127143
}),
128144
);
129145
};
@@ -160,17 +176,42 @@ export const createTranscriptSlice = <
160176
}
161177
},
162178
resetTranscript: () => {
163-
const { partialWordsByChannel, partialHints, handlePersist } = get();
179+
const { partialWordsByChannel, partialHintsByChannel, handlePersist } =
180+
get();
164181

165182
const remainingWords = Object.values(partialWordsByChannel).flat();
183+
184+
const channelIndices = Object.keys(partialWordsByChannel)
185+
.map(Number)
186+
.sort((a, b) => a - b);
187+
188+
const offsetByChannel = new Map<number, number>();
189+
let currentOffset = 0;
190+
for (const channelIndex of channelIndices) {
191+
offsetByChannel.set(channelIndex, currentOffset);
192+
currentOffset += partialWordsByChannel[channelIndex]?.length ?? 0;
193+
}
194+
195+
const remainingHints: RuntimeSpeakerHint[] = [];
196+
for (const channelIndex of channelIndices) {
197+
const hints = partialHintsByChannel[channelIndex] ?? [];
198+
const offset = offsetByChannel.get(channelIndex) ?? 0;
199+
for (const hint of hints) {
200+
remainingHints.push({
201+
...hint,
202+
wordIndex: hint.wordIndex + offset,
203+
});
204+
}
205+
}
206+
166207
if (remainingWords.length > 0) {
167-
handlePersist?.(remainingWords, partialHints);
208+
handlePersist?.(remainingWords, remainingHints);
168209
}
169210

170211
set((state) =>
171212
mutate(state, (draft) => {
172213
draft.partialWordsByChannel = {};
173-
draft.partialHints = [];
214+
draft.partialHintsByChannel = {};
174215
draft.finalWordsMaxEndMsByChannel = {};
175216
draft.handlePersist = undefined;
176217
}),

0 commit comments

Comments
 (0)