Skip to content

Commit e1c2387

Browse files
cli: add hash to path on conflicting filename (#152)
* cli: add testcase for output with extension * cli: add testcase for relative paths * cli: add hash to filepath on duplicate * cli: better error message when input points to folder * cli: try to use filepath.ToSlash * cli: print filepath * cli: try another print filepath * cli: try another print filepath for glob * cli: try splitpattern with toslash * try: remove fmt calls * cli: try to remove ToSlash in hashing * cli: re-introduce ToSlash in hashing
1 parent 20209ee commit e1c2387

File tree

7 files changed

+288
-19
lines changed

7 files changed

+288
-19
lines changed

cli/html2markdown/cmd/exec.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,11 @@ func (cli *CLI) run() ([]error, error) {
163163
return nil, err
164164
}
165165

166+
err = calculateOutputPaths(cli.config.inputFilepath, inputs)
167+
if err != nil {
168+
return nil, err
169+
}
170+
166171
for _, input := range inputs {
167172
data, err := cli.readInput(input)
168173
if err != nil {
@@ -174,7 +179,7 @@ func (cli *CLI) run() ([]error, error) {
174179
return nil, err
175180
}
176181

177-
err = cli.writeOutput(outputType, getOutputFileName(input.fullFilepath), markdown)
182+
err = cli.writeOutput(outputType, input.outputFullFilepath, markdown)
178183
if err != nil {
179184
return nil, err
180185
}

cli/html2markdown/cmd/exec_test.go

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ type CLIGoldenInput struct {
5252
inputArgs []string
5353
}
5454

55-
func cliGoldenTester(t *testing.T, input CLIGoldenInput) {
55+
func cliGoldenTester(t *testing.T, folderpath string, input CLIGoldenInput) {
5656
if input.modeStdin == modeTerminal && input.inputStdin != nil {
5757
t.Fatal("invalid test: cannot provide stdin without pipe mode")
5858
}
@@ -77,7 +77,7 @@ func cliGoldenTester(t *testing.T, input CLIGoldenInput) {
7777
t.Fatal("neither stdout nor stderr have any content")
7878
}
7979

80-
g := goldie.New(t)
80+
g := goldie.New(t, goldie.WithFixtureDir(filepath.Join(folderpath, "testdata")))
8181
g.Assert(t, filepath.Join(t.Name(), "stdout"), stdout.Bytes())
8282
g.Assert(t, filepath.Join(t.Name(), "stderr"), stderr.Bytes())
8383
}
@@ -119,6 +119,14 @@ func TestExecute(t *testing.T) {
119119
directoryPath := newTestDirWithFiles(t)
120120
defer os.RemoveAll(directoryPath)
121121

122+
originalDir, err := os.Getwd()
123+
if err != nil {
124+
t.Fatal(err)
125+
}
126+
// By switching the directory we can work with relative paths.
127+
// This makes it easier to test the output of error messages...
128+
chdirWithCleanup(t, directoryPath)
129+
122130
testCases := []struct {
123131
desc string
124132
input CLIGoldenInput
@@ -512,6 +520,19 @@ func TestExecute(t *testing.T) {
512520
inputArgs: []string{"html2markdown", "--input", "not_found.html"},
513521
},
514522
},
523+
{
524+
desc: "[files] input directory",
525+
526+
input: CLIGoldenInput{
527+
modeStdin: modeTerminal,
528+
modeStdout: modePipe,
529+
modeStderr: modePipe,
530+
531+
inputStdin: nil,
532+
inputArgs: []string{"html2markdown", "--input", "input"}, // <-- "input" is a folder
533+
},
534+
},
535+
515536
{
516537
desc: "[files] multiple values",
517538

@@ -539,7 +560,7 @@ func TestExecute(t *testing.T) {
539560
}
540561
for _, tC := range testCases {
541562
t.Run(tC.desc, func(t *testing.T) {
542-
cliGoldenTester(t, tC.input)
563+
cliGoldenTester(t, originalDir, tC.input)
543564
})
544565
}
545566
}

cli/html2markdown/cmd/files_test.go

Lines changed: 180 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,27 @@ func newTestDirWithFiles(t *testing.T) string {
6767
return directoryPath
6868
}
6969

70+
// chdirWithCleanup changes the current working directory to the named directory,
71+
// and then restore the original working directory at the end of the test.
72+
//
73+
// TODO: Once we are on 1.24 we can replace this with t.Chdir()
74+
func chdirWithCleanup(t *testing.T, dir string) {
75+
olddir, err := os.Getwd()
76+
if err != nil {
77+
t.Fatalf("chdir: %v", err)
78+
}
79+
if err := os.Chdir(dir); err != nil {
80+
t.Fatalf("chdir %s: %v", dir, err)
81+
}
82+
83+
t.Cleanup(func() {
84+
if err := os.Chdir(olddir); err != nil {
85+
t.Errorf("chdir to original working directory %s: %v", olddir, err)
86+
os.Exit(1)
87+
}
88+
})
89+
}
90+
7091
func writePipeChar(buf *bytes.Buffer, index int) {
7192
if index == 0 {
7293
return
@@ -306,10 +327,80 @@ func TestExecute_NotOverwrite(t *testing.T) {
306327
expectRepresentation(t, directoryPath, `
307328
.
308329
├─output.md "**file content A**"
309-
`)
330+
`)
310331
})
311332
}
312333

334+
func TestExecute_DuplicateFiles(t *testing.T) {
335+
directoryPath := newTestDir(t)
336+
defer os.RemoveAll(directoryPath)
337+
338+
chdirWithCleanup(t, directoryPath)
339+
340+
inputFolderA := filepath.Join(directoryPath, "input", "a")
341+
inputFolderB := filepath.Join(directoryPath, "input", "b")
342+
inputFolderC := filepath.Join(directoryPath, "input", "nested", "c")
343+
344+
err := os.MkdirAll(inputFolderA, os.ModePerm)
345+
if err != nil {
346+
t.Fatal(err)
347+
}
348+
err = os.MkdirAll(inputFolderB, os.ModePerm)
349+
if err != nil {
350+
t.Fatal(err)
351+
}
352+
err = os.MkdirAll(inputFolderC, os.ModePerm)
353+
if err != nil {
354+
t.Fatal(err)
355+
}
356+
357+
err = os.WriteFile(filepath.Join(inputFolderA, "random.html"), []byte("file a"), 0644)
358+
if err != nil {
359+
t.Fatal(err)
360+
}
361+
err = os.WriteFile(filepath.Join(inputFolderB, "random.html"), []byte("file b"), 0644)
362+
if err != nil {
363+
t.Fatal(err)
364+
}
365+
err = os.WriteFile(filepath.Join(inputFolderC, "random.html"), []byte("file c"), 0644)
366+
if err != nil {
367+
t.Fatal(err)
368+
}
369+
370+
// - - - - - - - - - //
371+
args := []string{"html2markdown", "--input", filepath.Join(".", "input", "**", "*"), "--output", filepath.Join(".", "output") + "/"}
372+
373+
stdin := &FakeFile{mode: modeTerminal}
374+
stdout := &FakeFile{mode: modePipe}
375+
stderr := &FakeFile{mode: modePipe}
376+
377+
Run(stdin, stdout, stderr, args, testRelease)
378+
379+
if len(stderr.Bytes()) != 0 {
380+
t.Fatalf("expected no stderr content but got %q", stderr.String())
381+
}
382+
if len(stdout.Bytes()) != 0 {
383+
t.Fatalf("expected no stdout content")
384+
}
385+
// - - - - - - - - - //
386+
387+
expectRepresentation(t, directoryPath, `
388+
.
389+
├─input
390+
│ ├─a
391+
│ │ ├─random.html "file a"
392+
│ ├─b
393+
│ │ ├─random.html "file b"
394+
│ ├─nested
395+
│ │ ├─c
396+
│ │ │ ├─random.html "file c"
397+
├─output
398+
│ ├─random.689330a60f.md "file b"
399+
│ ├─random.f679b6e0c2.md "file c"
400+
│ ├─random.md "file a"
401+
`)
402+
}
403+
313404
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - //
314405

315406
func TestExecute_FilePattern(t *testing.T) {
@@ -355,7 +446,27 @@ func TestExecute_FilePattern(t *testing.T) {
355446
├─output
356447
│ ├─websites
357448
│ │ ├─the_cool_website.md "**file content A**"
449+
`,
450+
},
451+
{
452+
desc: "output to a specific extension",
453+
assembleArgs: func(dir string) []string {
454+
input := filepath.Join(dir, "input", "website_a.html")
455+
output := filepath.Join(dir, "output", "websites", "the_cool_website.txt")
358456

457+
return []string{"html2markdown", "--input", input, "--output", output}
458+
},
459+
expected: `
460+
.
461+
├─input
462+
│ ├─nested
463+
│ │ ├─website_c.html "<i>file content C</i>"
464+
│ ├─random.txt "other random file"
465+
│ ├─website_a.html "<strong>file content A</strong>"
466+
│ ├─website_b.html "<strong>file content B</strong>"
467+
├─output
468+
│ ├─websites
469+
│ │ ├─the_cool_website.txt "**file content A**"
359470
`,
360471
},
361472
{
@@ -379,6 +490,72 @@ func TestExecute_FilePattern(t *testing.T) {
379490
380491
`,
381492
},
493+
494+
// - - - - - - - - - - - - relative path - - - - - - - - - - - - //
495+
{
496+
desc: "relative path: output to a specific file",
497+
assembleArgs: func(_ string) []string {
498+
input := filepath.Join(".", "input", "website_a.html")
499+
output := filepath.Join(".", "output", "websites", "the_cool_website.md")
500+
501+
return []string{"html2markdown", "--input", input, "--output", output}
502+
},
503+
expected: `
504+
.
505+
├─input
506+
│ ├─nested
507+
│ │ ├─website_c.html "<i>file content C</i>"
508+
│ ├─random.txt "other random file"
509+
│ ├─website_a.html "<strong>file content A</strong>"
510+
│ ├─website_b.html "<strong>file content B</strong>"
511+
├─output
512+
│ ├─websites
513+
│ │ ├─the_cool_website.md "**file content A**"
514+
`,
515+
},
516+
{
517+
desc: "relative path: direct website html files",
518+
assembleArgs: func(_ string) []string {
519+
input := filepath.Join(".", "input", "website*.html")
520+
output := filepath.Join(".", "output") + string(os.PathSeparator)
521+
522+
return []string{"html2markdown", "--input", input, "--output", output}
523+
},
524+
expected: `
525+
.
526+
├─input
527+
│ ├─nested
528+
│ │ ├─website_c.html "<i>file content C</i>"
529+
│ ├─random.txt "other random file"
530+
│ ├─website_a.html "<strong>file content A</strong>"
531+
│ ├─website_b.html "<strong>file content B</strong>"
532+
├─output
533+
│ ├─website_a.md "**file content A**"
534+
│ ├─website_b.md "**file content B**"
535+
`,
536+
},
537+
{
538+
desc: "relative path: output to current directory",
539+
assembleArgs: func(_ string) []string {
540+
input := filepath.Join(".", "input", "website*.html")
541+
output := "." + string(os.PathSeparator)
542+
543+
return []string{"html2markdown", "--input", input, "--output", output}
544+
},
545+
expected: `
546+
.
547+
├─input
548+
│ ├─nested
549+
│ │ ├─website_c.html "<i>file content C</i>"
550+
│ ├─random.txt "other random file"
551+
│ ├─website_a.html "<strong>file content A</strong>"
552+
│ ├─website_b.html "<strong>file content B</strong>"
553+
├─output
554+
├─website_a.md "**file content A**"
555+
├─website_b.md "**file content B**"
556+
`,
557+
},
558+
382559
// - - - - - - - - - - - - pattern - - - - - - - - - - - - //
383560
{
384561
desc: "pattern matches single file",
@@ -522,6 +699,8 @@ func TestExecute_FilePattern(t *testing.T) {
522699
directoryPath := newTestDirWithFiles(t)
523700
defer os.RemoveAll(directoryPath)
524701

702+
chdirWithCleanup(t, directoryPath)
703+
525704
args := tC.assembleArgs(directoryPath)
526705
t.Logf("args: %+v\n", args)
527706

cli/html2markdown/cmd/io_input.go

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,18 @@ import (
1111
)
1212

1313
type input struct {
14-
fullFilepath string
15-
data []byte
14+
inputFullFilepath string
15+
outputFullFilepath string
16+
data []byte
1617
}
1718

1819
// E.g. "website.html" -> "website"
1920
func fileNameWithoutExtension(fileName string) string {
2021
return strings.TrimSuffix(fileName, filepath.Ext(fileName))
2122
}
2223

23-
func getOutputFileName(fullFilepath string) string {
24-
basenameWithExt := filepath.Base(fullFilepath)
25-
basename := fileNameWithoutExtension(basenameWithExt)
26-
27-
return basename + ".md"
28-
}
24+
// If the output is a file, it would be "output.md"
25+
var defaultBasename = "output"
2926

3027
func (cli *CLI) listInputs() ([]*input, error) {
3128
if cli.isStdinPipe && cli.config.inputFilepath != "" {
@@ -41,9 +38,8 @@ func (cli *CLI) listInputs() ([]*input, error) {
4138
}
4239
return []*input{
4340
{
44-
// If the output is a file, it would be "output.md"
45-
fullFilepath: "output",
46-
data: data,
41+
inputFullFilepath: defaultBasename,
42+
data: data,
4743
},
4844
}, nil
4945
}
@@ -54,6 +50,16 @@ func (cli *CLI) listInputs() ([]*input, error) {
5450
return nil, err
5551
}
5652
if len(matches) == 0 {
53+
// The inputFilepath wasn't actually a glob but was pointing to an existing folder.
54+
// The user probably wanted to convert all files in that folder — so we recommend the glob.
55+
if outInfo, err := os.Stat(cli.config.inputFilepath); err == nil && outInfo.IsDir() {
56+
return nil, NewCLIError(
57+
fmt.Errorf("input path %q is a directory, not a file", cli.config.inputFilepath),
58+
Paragraph("Here is how you can use a glob to match multiple files:"),
59+
CodeBlock(`html2markdown --input "src/*.html" --output "dist/"`),
60+
)
61+
}
62+
5763
return nil, NewCLIError(
5864
fmt.Errorf("no files found matching pattern %q", cli.config.inputFilepath),
5965
Paragraph("Here is how you can use a glob to match multiple files:"),
@@ -64,8 +70,8 @@ func (cli *CLI) listInputs() ([]*input, error) {
6470
var inputs []*input
6571
for _, match := range matches {
6672
inputs = append(inputs, &input{
67-
fullFilepath: match,
68-
data: nil,
73+
inputFullFilepath: match,
74+
data: nil,
6975
})
7076
}
7177

@@ -84,7 +90,7 @@ func (cli *CLI) readInput(in *input) ([]byte, error) {
8490
return in.data, nil
8591
}
8692

87-
data, err := os.ReadFile(in.fullFilepath)
93+
data, err := os.ReadFile(in.inputFullFilepath)
8894
if err != nil {
8995
return nil, err
9096
}

0 commit comments

Comments
 (0)