Skip to content

Commit 25ed11b

Browse files
committed
(feat) parse docx or pdf
Signed-off-by: Dan Selman <[email protected]>
1 parent 9356294 commit 25ed11b

File tree

6 files changed

+28
-7
lines changed

6 files changed

+28
-7
lines changed

packages/markdown-cli/lib/Commands.js

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ const CommonMarkTransformer = require('@accordproject/markdown-common').CommonMa
2121
const CiceroMarkTransformer = require('@accordproject/markdown-cicero').CiceroMarkTransformer;
2222
const SlateTransformer = require('@accordproject/markdown-slate').SlateTransformer;
2323
const HtmlTransformer = require('@accordproject/markdown-html').HtmlTransformer;
24+
const PdfTransformer = require('@accordproject/markdown-pdf').PdfTransformer;
25+
const DocxTransformer = require('@accordproject/markdown-docx').DocxTransformer;
2426

2527
/**
2628
* Utility class that implements the commands exposed by the CLI.
@@ -62,7 +64,7 @@ class Commands {
6264
argv = Commands.setDefaultFileArg(argv, 'sample', 'sample.md', ((argv, argDefaultName) => { return argDefaultName; }));
6365

6466
if(argv.verbose) {
65-
Logger.info(`parse sample ${argv.sample} printing intermediate transformations.`);
67+
Logger.info(`parse sample ${argv.sample} (or docx, pdf) printing intermediate transformations.`);
6668
}
6769

6870
return argv;
@@ -80,7 +82,7 @@ class Commands {
8082
* @param {boolean} [options.verbose] verbose output
8183
* @returns {object} Promise to the result of parsing
8284
*/
83-
static parse(samplePath, outputPath, options) {
85+
static async parse(samplePath, outputPath, options) {
8486
const { cicero, slate, html, verbose } = options;
8587
const commonOptions = {};
8688
commonOptions.tagInfo = true;
@@ -89,9 +91,24 @@ class Commands {
8991
const ciceroMark = new CiceroMarkTransformer();
9092
const slateMark = new SlateTransformer();
9193
const htmlMark = new HtmlTransformer();
94+
const docx = new DocxTransformer();
95+
const pdf = new PdfTransformer();
96+
97+
let result = null;
98+
99+
if(samplePath.endsWith('.pdf')) {
100+
const pdfBuffer = Fs.readFileSync(samplePath);
101+
result = await pdf.toCiceroMark(pdfBuffer, 'json');
102+
}
103+
else if(samplePath.endsWith('.docx')) {
104+
const docxBuffer = Fs.readFileSync(samplePath);
105+
result = await docx.toCiceroMark(docxBuffer, 'json');
106+
}
107+
else {
108+
const markdownText = Fs.readFileSync(samplePath, 'utf8');
109+
result = commonMark.fromMarkdown(markdownText, 'json');
110+
}
92111

93-
const markdownText = Fs.readFileSync(samplePath, 'utf8');
94-
let result = commonMark.fromMarkdown(markdownText, 'json');
95112
if(verbose) {
96113
Logger.info('=== CommonMark ===');
97114
Logger.info(JSON.stringify(result, null, 4));
@@ -150,7 +167,7 @@ class Commands {
150167
}
151168

152169
/**
153-
* Parse a sample markdown
170+
* Parse a sample markdown/pdf/docx
154171
*
155172
* @param {string} dataPath to the sample file
156173
* @param {string} outputPath to an output file

packages/markdown-cli/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
"@accordproject/markdown-cicero": "0.9.4",
5858
"@accordproject/markdown-slate": "0.9.4",
5959
"@accordproject/markdown-html": "0.9.4",
60+
"@accordproject/markdown-pdf": "0.9.4",
61+
"@accordproject/markdown-docx": "0.9.4",
6062
"immutable": "^4.0.0-rc.12",
6163
"jsome": "2.5.0",
6264
"slate": "^0.47.8",

packages/markdown-docx/src/DocxTransformer.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class DocxTransformer {
3434

3535
/**
3636
* Converts an html string to a CiceroMark DOM
37-
* @param {string} input - html string
37+
* @param {Buffer} input - docx buffer
3838
* @param {string} [format] result format, defaults to 'concerto'. Pass
3939
* 'json' to return the JSON data.
4040
* @returns {promise} promise to the CiceroMark DOM

packages/markdown-docx/src/DocxTransformer.test.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ describe.only('import docx', () => {
4848
getDocxFiles().forEach(([file, docx], i) => {
4949
it(`converts ${file} to ciceromark`, async () => {
5050
const json = await docxTransformer.toCiceroMark(docx, 'json');
51+
console.log(JSON.stringify(json, null, 4));
5152
expect(json).toMatchSnapshot(); // (1)
5253
});
5354
});

packages/markdown-pdf/src/PdfTransformer.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class PdfTransformer {
3434

3535
/**
3636
* Converts an pdf buffer to a CiceroMark DOM
37-
* @param {string} input - html string
37+
* @param {Buffer} input - pdf buffer
3838
* @param {string} [format] result format, defaults to 'concerto'. Pass
3939
* 'json' to return the JSON data.
4040
* @returns {promise} a Promise to the CiceroMark DOM

packages/markdown-pdf/src/PdfTransformer.test.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ describe.only('pdf import', () => {
4848
getPdfFiles().forEach(([file, pdfContent], i) => {
4949
it(`converts ${file} to cicero mark`, async () => {
5050
const ciceroMarkDom = await pdfTransformer.toCiceroMark(pdfContent, 'json');
51+
console.log(JSON.stringify(ciceroMarkDom, null, 4));
5152
expect(ciceroMarkDom).toMatchSnapshot(); // (1)
5253
});
5354
});

0 commit comments

Comments
 (0)