diff --git a/.pnp.cjs b/.pnp.cjs index d503e1e..e7de48e 100755 --- a/.pnp.cjs +++ b/.pnp.cjs @@ -48,10 +48,12 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) { ["@rollup/plugin-commonjs", "virtual:4e4824ddb2bb82a661ef35e7cd03b85962fa8bd39755461bf5f82f22c0044638d264b1390f9aadc6399be3743d686ffbf74edc5194d4caf1c8f6ba822d1bc5d3#npm:15.0.0"], ["@rollup/plugin-node-resolve", "virtual:4e4824ddb2bb82a661ef35e7cd03b85962fa8bd39755461bf5f82f22c0044638d264b1390f9aadc6399be3743d686ffbf74edc5194d4caf1c8f6ba822d1bc5d3#npm:9.0.0"], ["@rollup/plugin-strip", "virtual:4e4824ddb2bb82a661ef35e7cd03b85962fa8bd39755461bf5f82f22c0044638d264b1390f9aadc6399be3743d686ffbf74edc5194d4caf1c8f6ba822d1bc5d3#npm:2.0.0"], + ["@types/aes-js", "npm:3.1.1"], ["@types/jest", "npm:26.0.10"], ["@types/pako", "npm:1.0.1"], ["@typescript-eslint/eslint-plugin", "virtual:4e4824ddb2bb82a661ef35e7cd03b85962fa8bd39755461bf5f82f22c0044638d264b1390f9aadc6399be3743d686ffbf74edc5194d4caf1c8f6ba822d1bc5d3#npm:3.9.1"], ["@typescript-eslint/parser", "virtual:4e4824ddb2bb82a661ef35e7cd03b85962fa8bd39755461bf5f82f22c0044638d264b1390f9aadc6399be3743d686ffbf74edc5194d4caf1c8f6ba822d1bc5d3#npm:3.9.1"], + ["aes-js", "npm:3.1.2"], ["babel-jest", "virtual:3bf5cb85a2e664643c98d2e719d27af43974598ccaa6ea3c355c3ae5c1d5177e21a427bd1a7e70bdce3b4affdbcea39f2ffaa213984380213caeec2a5a6a00da#npm:26.3.0"], ["cfb", "npm:1.2.0"], ["eslint", "npm:7.7.0"], @@ -5696,6 +5698,15 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) { "linkType": "HARD", }] ]], + ["@types/aes-js", [ + ["npm:3.1.1", { + "packageLocation": "./.yarn/cache/@types-aes-js-npm-3.1.1-c9780f09d6-388d21adba.zip/node_modules/@types/aes-js/", + "packageDependencies": [ + ["@types/aes-js", "npm:3.1.1"] + ], + "linkType": "HARD", + }] + ]], ["@types/babel__core", [ ["npm:7.1.9", { "packageLocation": "./.yarn/cache/@types-babel__core-npm-7.1.9-23e959fd72-d92c530efc.zip/node_modules/@types/babel__core/", @@ -6907,6 +6918,15 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) { "linkType": "HARD", }] ]], + ["aes-js", [ + ["npm:3.1.2", { + "packageLocation": "./.yarn/cache/aes-js-npm-3.1.2-d7549a23a2-062154d50b.zip/node_modules/aes-js/", + "packageDependencies": [ + ["aes-js", "npm:3.1.2"] + ], + "linkType": "HARD", + }] + ]], ["agent-base", [ ["npm:6.0.2", { "packageLocation": "./.yarn/cache/agent-base-npm-6.0.2-428f325a93-f52b6872cc.zip/node_modules/agent-base/", @@ -14749,10 +14769,12 @@ function $$SETUP_STATE(hydrateRuntimeState, basePath) { ["@rollup/plugin-commonjs", "virtual:4e4824ddb2bb82a661ef35e7cd03b85962fa8bd39755461bf5f82f22c0044638d264b1390f9aadc6399be3743d686ffbf74edc5194d4caf1c8f6ba822d1bc5d3#npm:15.0.0"], ["@rollup/plugin-node-resolve", "virtual:4e4824ddb2bb82a661ef35e7cd03b85962fa8bd39755461bf5f82f22c0044638d264b1390f9aadc6399be3743d686ffbf74edc5194d4caf1c8f6ba822d1bc5d3#npm:9.0.0"], ["@rollup/plugin-strip", "virtual:4e4824ddb2bb82a661ef35e7cd03b85962fa8bd39755461bf5f82f22c0044638d264b1390f9aadc6399be3743d686ffbf74edc5194d4caf1c8f6ba822d1bc5d3#npm:2.0.0"], + ["@types/aes-js", "npm:3.1.1"], ["@types/jest", "npm:26.0.10"], ["@types/pako", "npm:1.0.1"], ["@typescript-eslint/eslint-plugin", "virtual:4e4824ddb2bb82a661ef35e7cd03b85962fa8bd39755461bf5f82f22c0044638d264b1390f9aadc6399be3743d686ffbf74edc5194d4caf1c8f6ba822d1bc5d3#npm:3.9.1"], ["@typescript-eslint/parser", "virtual:4e4824ddb2bb82a661ef35e7cd03b85962fa8bd39755461bf5f82f22c0044638d264b1390f9aadc6399be3743d686ffbf74edc5194d4caf1c8f6ba822d1bc5d3#npm:3.9.1"], + ["aes-js", "npm:3.1.2"], ["babel-jest", "virtual:3bf5cb85a2e664643c98d2e719d27af43974598ccaa6ea3c355c3ae5c1d5177e21a427bd1a7e70bdce3b4affdbcea39f2ffaa213984380213caeec2a5a6a00da#npm:26.3.0"], ["cfb", "npm:1.2.0"], ["eslint", "npm:7.7.0"], diff --git a/.yarn/cache/@types-aes-js-npm-3.1.1-c9780f09d6-388d21adba.zip b/.yarn/cache/@types-aes-js-npm-3.1.1-c9780f09d6-388d21adba.zip new file mode 100644 index 0000000..29ccd61 Binary files /dev/null and b/.yarn/cache/@types-aes-js-npm-3.1.1-c9780f09d6-388d21adba.zip differ diff --git a/.yarn/cache/aes-js-npm-3.1.2-d7549a23a2-062154d50b.zip b/.yarn/cache/aes-js-npm-3.1.2-d7549a23a2-062154d50b.zip new file mode 100644 index 0000000..3211543 Binary files /dev/null and b/.yarn/cache/aes-js-npm-3.1.2-d7549a23a2-062154d50b.zip differ diff --git a/package.json b/package.json index c612d44..6d9c819 100644 --- a/package.json +++ b/package.json @@ -47,6 +47,7 @@ "@rollup/plugin-commonjs": "^15.0.0", "@rollup/plugin-node-resolve": "^9.0.0", "@rollup/plugin-strip": "^2.0.0", + "@types/aes-js": "^3", "@types/jest": "^26.0.10", "@types/pako": "^1.0.1", "@typescript-eslint/eslint-plugin": "^3.6.1", @@ -63,6 +64,7 @@ "typescript": "^4.0.2" }, "dependencies": { + "aes-js": "^3.1.2", "cfb": "^1.2.0", "pako": "^1.0.11" } diff --git a/src/parser/parse.ts b/src/parser/parse.ts index da4281f..846e66a 100644 --- a/src/parser/parse.ts +++ b/src/parser/parse.ts @@ -30,6 +30,7 @@ import HWPVersion from '../models/version' import Section from '../models/section' import DocInfoParser from './DocInfoParser' import SectionParser from './SectionParser' +import parseViewText from './parseViewText' import ByteReader from '../utils/byteReader' import { getBitValue } from '../utils/bitUtils' @@ -119,17 +120,25 @@ function parseSection(container: CFB$Container, sectionNumber: number): Section return new SectionParser(decodedContent).parse() } +function parseBodyText(container: CFB$Container, docInfo: DocInfo): Section[] { + const sections: Section[] = [] + + for (let i = 0; i < docInfo.sectionSize; i += 1) { + sections.push(parseSection(container, i)) + } + + return sections +} + function parse(input: CFB$Blob, options?: CFB$ParsingOptions): HWPDocument { const container: CFB$Container = read(input, options) const header = parseFileHeader(container) const docInfo = parseDocInfo(container, header) - const sections: Section[] = [] - - for (let i = 0; i < docInfo.sectionSize; i += 1) { - sections.push(parseSection(container, i)) - } + const sections: Section[] = header.properties.distribution + ? parseViewText(container) + : parseBodyText(container, docInfo) return new HWPDocument(header, docInfo, sections) } diff --git a/src/parser/parseViewText.ts b/src/parser/parseViewText.ts new file mode 100644 index 0000000..963c9cf --- /dev/null +++ b/src/parser/parseViewText.ts @@ -0,0 +1,78 @@ +// Reference 1 : https://cdn.hancom.com/link/docs/%ED%95%9C%EA%B8%80%EB%AC%B8%EC%84%9C%ED%8C%8C%EC%9D%BC%ED%98%95%EC%8B%9D_%EB%B0%B0%ED%8F%AC%EC%9A%A9%EB%AC%B8%EC%84%9C_revision1.2.hwp +// Reference 2 : https://groups.google.com/g/hwp-foss/c/d2KL2ypR89Q + +import { + find, + CFB$Container, + CFB$Entry, +} from 'cfb' +import { inflate } from 'pako' +import * as aesjs from 'aes-js' + +import Section from '../models/section' +import SectionParser from './SectionParser' +import ByteReader from '../utils/byteReader' + +function createRand(seed = 1) { + let randomSeed = seed + return () => { + randomSeed = (randomSeed * 214013 + 2531011) & 0xFFFFFFFF + return (randomSeed >> 16) & 0x7FFF + } +} + +function decrypt(cipherText: ArrayBuffer, decKey: ArrayBuffer) { + // eslint-disable-next-line new-cap + const aesEcb = new aesjs.ModeOfOperation.ecb(new Uint8Array(decKey)) + const decryptedBytes = aesEcb.decrypt(new Uint8Array(cipherText)) + return decryptedBytes +} + +function getDecryptionKey(data: ArrayBuffer) : ArrayBuffer { + const sha1Encoded = new Uint8Array(data) + const sha1Decoded = new Uint8Array(sha1Encoded.length) + const seed = (new DataView(data.slice(0, 4))).getInt32(0, true) + const offset = 4 + (seed & 0xF) + const rand = createRand(seed) + for (let j = 0, n = 0, k = 0; j < 256; j += 1, n -= 1) { + if (n === 0) { + k = rand() & 0xFF + n = (rand() & 0xF) + 1 + } + sha1Decoded[j] = sha1Encoded[j] ^ k + } + const sha1ucsstr = sha1Decoded.slice(offset, 80) + return sha1ucsstr.slice(0, 16) +} + +function parseViewTextSection(entry: CFB$Entry): Section { + const content = new Uint8Array(entry.content) + const reader = new ByteReader(content.buffer) + const [, , size] = reader.readRecord() + const distDocData = reader.read(size) + const encryptedData = reader.read(reader.remainByte()) + const decKey = getDecryptionKey(distDocData) + const decrypted = decrypt(encryptedData, decKey) + const decodedContent: Uint8Array = inflate(decrypted, { windowBits: -15 }) + const section: Section = new SectionParser(decodedContent).parse() + return section +} + +function parseViewText(container: CFB$Container): Section[] { + const view = find(container, 'Root Entry/ViewText/') + const viewPaths = container.FullPaths.filter((e: string) => e.startsWith('Root Entry/ViewText/Section')) + const sections: Section[] = [] + + if (view && viewPaths.length > 0) { + for (let i = 0; i < viewPaths.length; i += 1) { + const entry = find(container, `Root Entry/ViewText/Section${i}`) + if (entry != null) { + sections.push(parseViewTextSection(entry)) + } + } + } + + return sections +} + +export default parseViewText diff --git a/yarn.lock b/yarn.lock index 3a818d4..1904788 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3393,6 +3393,13 @@ __metadata: languageName: node linkType: hard +"@types/aes-js@npm:^3": + version: 3.1.1 + resolution: "@types/aes-js@npm:3.1.1" + checksum: 388d21adbaa70548f9e94947430f53b54d13f99408fc6461227c2529b9dbaa5397a135e8bb4584206c65a3ef837b868b34602816dc7c20f1537ea9ef23a74bd9 + languageName: node + linkType: hard + "@types/babel__core@npm:^7.0.0, @types/babel__core@npm:^7.1.7": version: 7.1.9 resolution: "@types/babel__core@npm:7.1.9" @@ -4324,6 +4331,13 @@ __metadata: languageName: node linkType: hard +"aes-js@npm:^3.1.2": + version: 3.1.2 + resolution: "aes-js@npm:3.1.2" + checksum: 062154d50b1e433cc8c3b8ca7879f3a6375d5e79c2a507b2b6c4ec920b4cd851bf2afa7f65c98761a9da89c0ab618cbe6529e8e9a1c71f93290b53128fb8f712 + languageName: node + linkType: hard + "agent-base@npm:6, agent-base@npm:^6.0.2": version: 6.0.2 resolution: "agent-base@npm:6.0.2" @@ -10935,10 +10949,12 @@ fsevents@~2.3.2: "@rollup/plugin-commonjs": ^15.0.0 "@rollup/plugin-node-resolve": ^9.0.0 "@rollup/plugin-strip": ^2.0.0 + "@types/aes-js": ^3 "@types/jest": ^26.0.10 "@types/pako": ^1.0.1 "@typescript-eslint/eslint-plugin": ^3.6.1 "@typescript-eslint/parser": ^3.9.1 + aes-js: ^3.1.2 babel-jest: ^26.3.0 cfb: ^1.2.0 eslint: ^7.7.0