Skip to content

Commit 4f15b88

Browse files
fix(lightonocr): fix test failures for vocab_size access and device placement #0
- Use config.text_config.vocab_size instead of config.vocab_size for composite config - Remove explicit device placement from attention_mask and image_sizes tensors - Allow device_map='auto' to handle device placement in model parallelism tests
1 parent 1b9ea63 commit 4f15b88

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

tests/models/lightonocr/test_modeling_lightonocr.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,11 +156,11 @@ def prepare_config_and_inputs_for_common(self):
156156
# Place image tokens at the beginning
157157
input_ids[:, : self.num_image_tokens] = config.image_token_id
158158

159-
attention_mask = input_ids.ne(self.pad_token_id).to(torch_device)
159+
attention_mask = input_ids.ne(self.pad_token_id)
160160

161161
# Create image_sizes as tensor - must match batch size
162162
image_sizes = torch.tensor(
163-
[[self.image_size, self.image_size]] * self.batch_size, dtype=torch.long, device=torch_device
163+
[[self.image_size, self.image_size]] * self.batch_size, dtype=torch.long
164164
)
165165

166166
inputs_dict = {
@@ -198,11 +198,11 @@ def prepare_config_and_inputs_for_generate(self, batch_size=None):
198198
# Place image tokens at the beginning
199199
input_ids[:, : self.num_image_tokens] = config.image_token_id
200200

201-
attention_mask = input_ids.ne(self.pad_token_id).to(torch_device)
201+
attention_mask = input_ids.ne(self.pad_token_id)
202202

203203
# Create image_sizes as tensor - must match batch size
204204
image_sizes = torch.tensor(
205-
[[self.image_size, self.image_size]] * batch_size, dtype=torch.long, device=torch_device
205+
[[self.image_size, self.image_size]] * batch_size, dtype=torch.long
206206
)
207207

208208
inputs_dict = {
@@ -612,7 +612,7 @@ def test_model_can_generate_without_images(self):
612612
model.eval()
613613

614614
# Create text-only input
615-
input_ids = torch.randint(0, config.vocab_size - 1, (1, 10), device=torch_device) + 1
615+
input_ids = torch.randint(0, config.text_config.vocab_size - 1, (1, 10), device=torch_device) + 1
616616

617617
with torch.no_grad():
618618
outputs = model.generate(input_ids=input_ids, max_new_tokens=5)
@@ -660,7 +660,7 @@ def test_model_forward_with_images(self):
660660
num_image_tokens = num_patches // (config.spatial_merge_size**2)
661661

662662
seq_len = num_image_tokens + 10
663-
input_ids = torch.randint(0, config.vocab_size - 1, (batch_size, seq_len), device=torch_device) + 1
663+
input_ids = torch.randint(0, config.text_config.vocab_size - 1, (batch_size, seq_len), device=torch_device) + 1
664664
# Ensure no tokens accidentally equal image_token_id
665665
input_ids[input_ids == config.image_token_id] = config.image_token_id + 1
666666
# Now place image tokens at the beginning
@@ -677,4 +677,4 @@ def test_model_forward_with_images(self):
677677
self.assertIsNotNone(outputs.logits)
678678
self.assertEqual(outputs.logits.shape[0], batch_size)
679679
self.assertEqual(outputs.logits.shape[1], seq_len)
680-
self.assertEqual(outputs.logits.shape[2], config.vocab_size)
680+
self.assertEqual(outputs.logits.shape[2], config.text_config.vocab_size)

0 commit comments

Comments
 (0)