Skip to content

Commit eee20cf

Browse files
committed
Fix the regex string used to find the chars to normalize with NFKC when searching
ICU has been updated in Firefox (see https://bugzilla.mozilla.org/show_bug.cgi?id=2000225). The char `0xA7F1` is now alphabetic and can be normalized as a "S".
1 parent ec71e4e commit eee20cf

File tree

1 file changed

+27
-3
lines changed

1 file changed

+27
-3
lines changed

web/pdf_find_utils.js

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
* limitations under the License.
1414
*/
1515

16+
import { FeatureTest } from "pdfjs-lib";
17+
1618
const CharacterType = {
1719
SPACE: 0,
1820
ALPHA_LETTER: 1,
@@ -114,8 +116,13 @@ function getCharacterType(charCode) {
114116

115117
let NormalizeWithNFKC;
116118
function getNormalizeWithNFKC() {
117-
/* eslint-disable no-irregular-whitespace */
118-
NormalizeWithNFKC ||= ` ¨ª¯²-µ¸-º¼-¾IJ-ijĿ-ŀʼnſDŽ-njDZ-dzʰ-ʸ˘-˝ˠ-ˤʹͺ;΄-΅·ϐ-ϖϰ-ϲϴ-ϵϹևٵ-ٸक़-य़ড়-ঢ়য়ਲ਼ਸ਼ਖ਼-ਜ਼ਫ਼ଡ଼-ଢ଼ำຳໜ-ໝ༌གྷཌྷདྷབྷཛྷཀྵჼᴬ-ᴮᴰ-ᴺᴼ-ᵍᵏ-ᵪᵸᶛ-ᶿẚ-ẛάέήίόύώΆ᾽-῁ΈΉ῍-῏ΐΊ῝-῟ΰΎ῭-`ΌΏ´-῾ - ‑‗․-… ″-‴‶-‷‼‾⁇-⁉⁗ ⁰-ⁱ⁴-₎ₐ-ₜ₨℀-℃℅-ℇ℉-ℓℕ-№ℙ-ℝ℠-™ℤΩℨK-ℭℯ-ℱℳ-ℹ℻-⅀ⅅ-ⅉ⅐-ⅿ↉∬-∭∯-∰〈-〉①-⓪⨌⩴-⩶⫝̸ⱼ-ⱽⵯ⺟⻳⼀-⿕ 〶〸-〺゛-゜ゟヿㄱ-ㆎ㆒-㆟㈀-㈞㈠-㉇㉐-㉾㊀-㏿ꚜ-ꚝꝰꟲ-ꟴꟸ-ꟹꭜ-ꭟꭩ豈-嗀塚晴凞-羽蘒諸逸-都飯-舘並-龎ff-stﬓ-ﬗיִײַ-זּטּ-לּמּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-﷼︐-︙︰-﹄﹇-﹒﹔-﹦﹨-﹫ﹰ-ﹲﹴﹶ-ﻼ!-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ¢-₩`;
119+
if (
120+
(typeof PDFJSDev === "undefined" && FeatureTest.platform.isFirefox) ||
121+
(typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL"))
122+
) {
123+
/* eslint-disable no-irregular-whitespace */
124+
NormalizeWithNFKC ||= `\xA0¨ª¯²-µ¸-º¼-¾IJ-ijĿ-ŀʼnſDŽ-njDZ-dzʰ-ʸ˘-˝ˠ-ˤʹͺ;΄-΅·ϐ-ϖϰ-ϲϴ-ϵϹևٵ-ٸक़-य़ড়-ঢ়য়ਲ਼ਸ਼ਖ਼-ਜ਼ਫ਼ଡ଼-ଢ଼ำຳໜ-ໝ༌གྷཌྷདྷབྷཛྷཀྵჼᴬ-ᴮᴰ-ᴺᴼ-ᵍᵏ-ᵪᵸᶛ-ᶿẚ-ẛάέήίόύώΆ᾽-῁ΈΉ῍-῏ΐΊ῝-῟ΰΎ῭-`ΌΏ´-῾ - ‑‗․-… ″-‴‶-‷‼‾⁇-⁉⁗ ⁰-ⁱ⁴-₎ₐ-ₜ₨℀-℃℅-ℇ℉-ℓℕ-№ℙ-ℝ℠-™ℤΩℨK-ℭℯ-ℱℳ-ℹ℻-⅀ⅅ-ⅉ⅐-ⅿ↉∬-∭∯-∰〈-〉①-⓪⨌⩴-⩶⫝̸ⱼ-ⱽⵯ⺟⻳⼀-⿕ 〶〸-〺゛-゜ゟヿㄱ-ㆎ㆒-㆟㈀-㈞㈠-㉇㉐-㉾㊀-㏿ꚜ-ꚝꝰ꟱-ꟴꟸ-ꟹꭜ-ꭟꭩ豈-嗀塚晴凞-羽蘒諸逸-都飯-舘並-龎ff-stﬓ-ﬗיִײַ-זּטּ-לּמּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-﷼︐-︙︰-﹄﹇-﹒﹔-﹦﹨-﹫ﹰ-ﹲﹴﹶ-ﻼ!-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ¢-₩`;
125+
}
119126

120127
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
121128
const ranges = [];
@@ -145,7 +152,24 @@ function getNormalizeWithNFKC() {
145152
}
146153
}
147154
}
148-
if (ranges.join("") !== NormalizeWithNFKC) {
155+
156+
const rangesStr = ranges.join("");
157+
if (!NormalizeWithNFKC) {
158+
NormalizeWithNFKC = rangesStr;
159+
} else if (rangesStr !== NormalizeWithNFKC) {
160+
for (let i = 1; i < rangesStr.length; i++) {
161+
if (rangesStr[i] !== NormalizeWithNFKC[i]) {
162+
console.log(
163+
`Difference at index ${i}: ` +
164+
`U+${rangesStr.charCodeAt(i).toString(16).toUpperCase().padStart(4, "0")}` +
165+
`!== U+${NormalizeWithNFKC.charCodeAt(i)
166+
.toString(16)
167+
.toUpperCase()
168+
.padStart(4, "0")}`
169+
);
170+
break;
171+
}
172+
}
149173
throw new Error(
150174
"getNormalizeWithNFKC - update the `NormalizeWithNFKC` string."
151175
);

0 commit comments

Comments
 (0)