Skip to content

Commit b1a76ee

Browse files
docs: add documentation for domain and css selector
1 parent 4d6b2ff commit b1a76ee

File tree

4 files changed

+82
-10
lines changed

4 files changed

+82
-10
lines changed

README.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,34 @@ func main() {
8484

8585
- 🧑‍💻 [Example code, basics](/examples/basics/main.go)
8686

87+
Use `WithDomain` to convert _relative_ links to _absolute_ links:
88+
89+
```go
90+
package main
91+
92+
import (
93+
"fmt"
94+
"log"
95+
96+
htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2"
97+
"github.com/JohannesKaufmann/html-to-markdown/v2/converter"
98+
)
99+
100+
func main() {
101+
input := `<img src="/assets/image.png" />`
102+
103+
markdown, err := htmltomarkdown.ConvertString(
104+
input,
105+
converter.WithDomain("https://example.com"),
106+
)
107+
if err != nil {
108+
log.Fatal(err)
109+
}
110+
fmt.Println(markdown)
111+
// Output: ![](https://example.com/assets/image.png)
112+
}
113+
```
114+
87115
The function `htmltomarkdown.ConvertString()` is a _small wrapper_ around `converter.NewConverter()` and the _base_ and _commonmark_ plugins. If you want more control, use the following:
88116

89117
```go
@@ -217,6 +245,12 @@ This domain is for use in illustrative examples in documents. You may use this d
217245
[More information...](https://www.iana.org/domains/example)
218246
```
219247

248+
Use `--help` to learn about the configurations, for example:
249+
250+
- `--domain="https://example.com"` to convert _relative_ links to _absolute_ links.
251+
- `--exclude-selector=".ad"` to exclude the html elements with `class="ad"` from the conversion.
252+
- `--include-selector="article"` to only include the `<article>` html elements in the conversion.
253+
220254
_(The cli does not support every option yet. Over time more customization will be added)_
221255

222256
---

convert.go

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package htmltomarkdown
22

33
import (
4+
"io"
5+
46
"github.com/JohannesKaufmann/html-to-markdown/v2/converter"
57
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/base"
68
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/commonmark"
@@ -10,29 +12,43 @@ import (
1012
// ConvertString converts a html-string to a markdown-string.
1113
//
1214
// Under the hood `html.Parse()` is used to parse the HTML.
13-
func ConvertString(htmlInput string) (string, error) {
15+
func ConvertString(htmlInput string, opts ...converter.ConvertOptionFunc) (string, error) {
16+
conv := converter.NewConverter(
17+
converter.WithPlugins(
18+
base.NewBasePlugin(),
19+
commonmark.NewCommonmarkPlugin(),
20+
),
21+
)
22+
23+
return conv.ConvertString(htmlInput, opts...)
24+
}
25+
26+
// ConvertReader converts the html from the reader to markdown.
27+
//
28+
// Under the hood `html.Parse()` is used to parse the HTML.
29+
func ConvertReader(r io.Reader, opts ...converter.ConvertOptionFunc) ([]byte, error) {
1430
conv := converter.NewConverter(
1531
converter.WithPlugins(
1632
base.NewBasePlugin(),
1733
commonmark.NewCommonmarkPlugin(),
1834
),
1935
)
2036

21-
return conv.ConvertString(htmlInput)
37+
return conv.ConvertReader(r, opts...)
2238
}
2339

2440
// ConvertNode converts a `*html.Node` to a markdown byte slice.
2541
//
2642
// If you have already parsed an HTML page using the `html.Parse()` function
2743
// from the "golang.org/x/net/html" package then you can pass this node
2844
// directly to the converter.
29-
func ConvertNode(doc *html.Node) ([]byte, error) {
45+
func ConvertNode(doc *html.Node, opts ...converter.ConvertOptionFunc) ([]byte, error) {
3046
conv := converter.NewConverter(
3147
converter.WithPlugins(
3248
base.NewBasePlugin(),
3349
commonmark.NewCommonmarkPlugin(),
3450
),
3551
)
3652

37-
return conv.ConvertNode(doc)
53+
return conv.ConvertNode(doc, opts...)
3854
}

convert_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,22 @@ func ExampleConvertString() {
2424
fmt.Println(markdown)
2525
// Output: **Bold Text**
2626
}
27+
28+
func ExampleWithDomain() {
29+
input := `<img src="/assets/image.png" />`
30+
31+
markdown, err := htmltomarkdown.ConvertString(
32+
input,
33+
// Provide a different domain for every convert call:
34+
converter.WithDomain("https://example.com"),
35+
)
36+
if err != nil {
37+
log.Fatal(err)
38+
}
39+
fmt.Println(markdown)
40+
// Output: ![](https://example.com/assets/image.png)
41+
}
42+
2743
func ExampleConvertNode() {
2844
input := `<strong>Bold Text</strong>`
2945

converter/convert.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,20 @@ type convertOption struct {
1515
domain string
1616
context context.Context
1717
}
18-
type convertOptionFunc func(o *convertOption)
18+
type ConvertOptionFunc func(o *convertOption)
1919

20-
func WithContext(ctx context.Context) convertOptionFunc {
20+
func WithContext(ctx context.Context) ConvertOptionFunc {
2121
return func(o *convertOption) {
2222
o.context = ctx
2323
}
2424
}
25-
func WithDomain(domain string) convertOptionFunc {
25+
26+
// WithDomain provides a base `domain` to the converter and
27+
// to the `AssembleAbsoluteURL` function.
28+
//
29+
// If a *relative* url is encountered (in an image or link) then the `domain` is used
30+
// to convert it to a *absolute* url.
31+
func WithDomain(domain string) ConvertOptionFunc {
2632
return func(o *convertOption) {
2733
o.domain = domain
2834
}
@@ -49,7 +55,7 @@ var errBasePluginMissing = errors.New(`you registered the "commonmark" plugin bu
4955
// If you have already parsed an HTML page using the `html.Parse()` function
5056
// from the "golang.org/x/net/html" package then you can pass this node
5157
// directly to the converter.
52-
func (conv *Converter) ConvertNode(doc *html.Node, opts ...convertOptionFunc) ([]byte, error) {
58+
func (conv *Converter) ConvertNode(doc *html.Node, opts ...ConvertOptionFunc) ([]byte, error) {
5359

5460
if err := conv.getError(); err != nil {
5561
// There can be errors while calling `Init` on the plugins (e.g. validation errors).
@@ -113,7 +119,7 @@ func (conv *Converter) ConvertNode(doc *html.Node, opts ...convertOptionFunc) ([
113119
// ConvertReader converts the html from the reader to markdown.
114120
//
115121
// Under the hood `html.Parse()` is used to parse the HTML.
116-
func (conv *Converter) ConvertReader(r io.Reader, opts ...convertOptionFunc) ([]byte, error) {
122+
func (conv *Converter) ConvertReader(r io.Reader, opts ...ConvertOptionFunc) ([]byte, error) {
117123
doc, err := html.Parse(r)
118124
if err != nil {
119125
return nil, err
@@ -125,7 +131,7 @@ func (conv *Converter) ConvertReader(r io.Reader, opts ...convertOptionFunc) ([]
125131
// ConvertString converts a html-string to a markdown-string.
126132
//
127133
// Under the hood `html.Parse()` is used to parse the HTML.
128-
func (conv *Converter) ConvertString(htmlInput string, opts ...convertOptionFunc) (string, error) {
134+
func (conv *Converter) ConvertString(htmlInput string, opts ...ConvertOptionFunc) (string, error) {
129135
r := strings.NewReader(htmlInput)
130136
output, err := conv.ConvertReader(r, opts...)
131137
if err != nil {

0 commit comments

Comments
 (0)