Skip to content

Commit 7b6767d

Browse files
Merge pull request #13784 from Snuffleupagus/issue-13783
When parsing corrupt documents, avoid inserting obviously broken data in the XRef-table (issue 13783)
2 parents 687cfce + b82c802 commit 7b6767d

File tree

5 files changed

+51
-10
lines changed

5 files changed

+51
-10
lines changed

src/core/core_utils.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ class MissingDataException extends BaseException {
5858
}
5959
}
6060

61+
class ParserEOFException extends BaseException {}
62+
6163
class XRefEntryException extends BaseException {}
6264

6365
class XRefParseException extends BaseException {}
@@ -450,6 +452,7 @@ export {
450452
isWhiteSpace,
451453
log2,
452454
MissingDataException,
455+
ParserEOFException,
453456
parseXFAPath,
454457
readInt8,
455458
readUint16,

src/core/parser.js

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ import {
3333
Name,
3434
Ref,
3535
} from "./primitives.js";
36-
import { isWhiteSpace, MissingDataException } from "./core_utils.js";
36+
import {
37+
isWhiteSpace,
38+
MissingDataException,
39+
ParserEOFException,
40+
} from "./core_utils.js";
3741
import { Ascii85Stream } from "./ascii_85_stream.js";
3842
import { AsciiHexStream } from "./ascii_hex_stream.js";
3943
import { CCITTFaxStream } from "./ccitt_stream.js";
@@ -124,10 +128,10 @@ class Parser {
124128
array.push(this.getObj(cipherTransform));
125129
}
126130
if (isEOF(this.buf1)) {
127-
if (!this.recoveryMode) {
128-
throw new FormatError("End of file inside array");
131+
if (this.recoveryMode) {
132+
return array;
129133
}
130-
return array;
134+
throw new ParserEOFException("End of file inside array.");
131135
}
132136
this.shift();
133137
return array;
@@ -148,10 +152,10 @@ class Parser {
148152
dict.set(key, this.getObj(cipherTransform));
149153
}
150154
if (isEOF(this.buf1)) {
151-
if (!this.recoveryMode) {
152-
throw new FormatError("End of file inside dictionary");
155+
if (this.recoveryMode) {
156+
return dict;
153157
}
154-
return dict;
158+
throw new ParserEOFException("End of file inside dictionary.");
155159
}
156160

157161
// Stream objects are not allowed inside content streams or

src/core/xref.js

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import {
3333
import { Lexer, Parser } from "./parser.js";
3434
import {
3535
MissingDataException,
36+
ParserEOFException,
3637
XRefEntryException,
3738
XRefParseException,
3839
} from "./core_utils.js";
@@ -453,15 +454,38 @@ class XRef {
453454
} else if ((m = objRegExp.exec(token))) {
454455
const num = m[1] | 0,
455456
gen = m[2] | 0;
456-
if (!this.entries[num] || this.entries[num].gen === gen) {
457+
458+
let contentLength,
459+
startPos = position + token.length,
460+
updateEntries = false;
461+
if (!this.entries[num]) {
462+
updateEntries = true;
463+
} else if (this.entries[num].gen === gen) {
464+
// Before overwriting an existing entry, ensure that the new one won't
465+
// cause *immediate* errors when it's accessed (fixes issue13783.pdf).
466+
try {
467+
const parser = new Parser({
468+
lexer: new Lexer(stream.makeSubStream(startPos)),
469+
});
470+
parser.getObj();
471+
updateEntries = true;
472+
} catch (ex) {
473+
if (ex instanceof ParserEOFException) {
474+
warn(`indexObjects -- checking object (${token}): "${ex}".`);
475+
} else {
476+
// The error may come from the `Parser`-instance being initialized
477+
// without an `XRef`-instance (we don't have a usable one yet).
478+
updateEntries = true;
479+
}
480+
}
481+
}
482+
if (updateEntries) {
457483
this.entries[num] = {
458484
offset: position - stream.start,
459485
gen,
460486
uncompressed: true,
461487
};
462488
}
463-
let contentLength,
464-
startPos = position + token.length;
465489

466490
// Find the next "obj" string, rather than "endobj", to ensure that
467491
// we won't skip over a new 'obj' operator in corrupt files where

test/pdfs/issue13783.pdf.link

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
https://github.com/mozilla/pdf.js/files/6869824/TimeTravel.pdf

test/test_manifest.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,6 +1382,15 @@
13821382
"enableXfa": true,
13831383
"type": "eq"
13841384
},
1385+
{ "id": "issue13783",
1386+
"file": "pdfs/issue13783.pdf",
1387+
"md5": "6958d827afa566efbd82f53271ea5cd6",
1388+
"link": true,
1389+
"rounds": 1,
1390+
"firstPage": 7,
1391+
"lastPage": 7,
1392+
"type": "eq"
1393+
},
13851394
{ "id": "issue9262",
13861395
"file": "pdfs/issue9262_reduced.pdf",
13871396
"md5": "5347ce2d7b3866625c22e115fd90e0de",

0 commit comments

Comments
 (0)