Skip to content

Commit ec5330f

Browse files
authored
Merge pull request #20465 from calixteman/update_char_to_nfkc
Fix the regex string used to find the chars to normalize with NFKC when searching
2 parents 925fc3d + eee20cf commit ec5330f

File tree

1 file changed

+27
-3
lines changed

1 file changed

+27
-3
lines changed

web/pdf_find_utils.js

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
* limitations under the License.
1414
*/
1515

16+
import { FeatureTest } from "pdfjs-lib";
17+
1618
const CharacterType = {
1719
SPACE: 0,
1820
ALPHA_LETTER: 1,
@@ -114,8 +116,13 @@ function getCharacterType(charCode) {
114116

115117
let NormalizeWithNFKC;
116118
function getNormalizeWithNFKC() {
117-
/* eslint-disable no-irregular-whitespace */
118-
NormalizeWithNFKC ||= ` ¨ª¯²-µ¸-º¼-¾IJ-ijĿ-ŀʼnſDŽ-njDZ-dzʰ-ʸ˘-˝ˠ-ˤʹͺ;΄-΅·ϐ-ϖϰ-ϲϴ-ϵϹևٵ-ٸक़-य़ড়-ঢ়য়ਲ਼ਸ਼ਖ਼-ਜ਼ਫ਼ଡ଼-ଢ଼ำຳໜ-ໝ༌གྷཌྷདྷབྷཛྷཀྵჼᴬ-ᴮᴰ-ᴺᴼ-ᵍᵏ-ᵪᵸᶛ-ᶿẚ-ẛάέήίόύώΆ᾽-῁ΈΉ῍-῏ΐΊ῝-῟ΰΎ῭-`ΌΏ´-῾ - ‑‗․-… ″-‴‶-‷‼‾⁇-⁉⁗ ⁰-ⁱ⁴-₎ₐ-ₜ₨℀-℃℅-ℇ℉-ℓℕ-№ℙ-ℝ℠-™ℤΩℨK-ℭℯ-ℱℳ-ℹ℻-⅀ⅅ-ⅉ⅐-ⅿ↉∬-∭∯-∰〈-〉①-⓪⨌⩴-⩶⫝̸ⱼ-ⱽⵯ⺟⻳⼀-⿕ 〶〸-〺゛-゜ゟヿㄱ-ㆎ㆒-㆟㈀-㈞㈠-㉇㉐-㉾㊀-㏿ꚜ-ꚝꝰꟲ-ꟴꟸ-ꟹꭜ-ꭟꭩ豈-嗀塚晴凞-羽蘒諸逸-都飯-舘並-龎ff-stﬓ-ﬗיִײַ-זּטּ-לּמּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-﷼︐-︙︰-﹄﹇-﹒﹔-﹦﹨-﹫ﹰ-ﹲﹴﹶ-ﻼ!-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ¢-₩`;
119+
if (
120+
(typeof PDFJSDev === "undefined" && FeatureTest.platform.isFirefox) ||
121+
(typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL"))
122+
) {
123+
/* eslint-disable no-irregular-whitespace */
124+
NormalizeWithNFKC ||= `\xA0¨ª¯²-µ¸-º¼-¾IJ-ijĿ-ŀʼnſDŽ-njDZ-dzʰ-ʸ˘-˝ˠ-ˤʹͺ;΄-΅·ϐ-ϖϰ-ϲϴ-ϵϹևٵ-ٸक़-य़ড়-ঢ়য়ਲ਼ਸ਼ਖ਼-ਜ਼ਫ਼ଡ଼-ଢ଼ำຳໜ-ໝ༌གྷཌྷདྷབྷཛྷཀྵჼᴬ-ᴮᴰ-ᴺᴼ-ᵍᵏ-ᵪᵸᶛ-ᶿẚ-ẛάέήίόύώΆ᾽-῁ΈΉ῍-῏ΐΊ῝-῟ΰΎ῭-`ΌΏ´-῾ - ‑‗․-… ″-‴‶-‷‼‾⁇-⁉⁗ ⁰-ⁱ⁴-₎ₐ-ₜ₨℀-℃℅-ℇ℉-ℓℕ-№ℙ-ℝ℠-™ℤΩℨK-ℭℯ-ℱℳ-ℹ℻-⅀ⅅ-ⅉ⅐-ⅿ↉∬-∭∯-∰〈-〉①-⓪⨌⩴-⩶⫝̸ⱼ-ⱽⵯ⺟⻳⼀-⿕ 〶〸-〺゛-゜ゟヿㄱ-ㆎ㆒-㆟㈀-㈞㈠-㉇㉐-㉾㊀-㏿ꚜ-ꚝꝰ꟱-ꟴꟸ-ꟹꭜ-ꭟꭩ豈-嗀塚晴凞-羽蘒諸逸-都飯-舘並-龎ff-stﬓ-ﬗיִײַ-זּטּ-לּמּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-﷼︐-︙︰-﹄﹇-﹒﹔-﹦﹨-﹫ﹰ-ﹲﹴﹶ-ﻼ!-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ¢-₩`;
125+
}
119126

120127
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
121128
const ranges = [];
@@ -145,7 +152,24 @@ function getNormalizeWithNFKC() {
145152
}
146153
}
147154
}
148-
if (ranges.join("") !== NormalizeWithNFKC) {
155+
156+
const rangesStr = ranges.join("");
157+
if (!NormalizeWithNFKC) {
158+
NormalizeWithNFKC = rangesStr;
159+
} else if (rangesStr !== NormalizeWithNFKC) {
160+
for (let i = 1; i < rangesStr.length; i++) {
161+
if (rangesStr[i] !== NormalizeWithNFKC[i]) {
162+
console.log(
163+
`Difference at index ${i}: ` +
164+
`U+${rangesStr.charCodeAt(i).toString(16).toUpperCase().padStart(4, "0")}` +
165+
`!== U+${NormalizeWithNFKC.charCodeAt(i)
166+
.toString(16)
167+
.toUpperCase()
168+
.padStart(4, "0")}`
169+
);
170+
break;
171+
}
172+
}
149173
throw new Error(
150174
"getNormalizeWithNFKC - update the `NormalizeWithNFKC` string."
151175
);

0 commit comments

Comments
 (0)