|
12 | 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | 13 | See the License for the specific language governing permissions and |
14 | 14 | limitations under the License. |
15 | | - */ |
| 15 | +*/ |
16 | 16 |
|
17 | 17 | // Package htmlutil implements a wrapper for Golang's html5 tokeniser / parser implementation, making it much easier to |
18 | 18 | // find and extract information, aiming to be powerful and intuitive while remaining a minimal and logical extension. |
@@ -153,7 +153,14 @@ func (n Node) OuterHTML() string { |
153 | 153 |
|
154 | 154 | // OuterText builds a string from the data of all text nodes in the sub-tree, starting from and including `n` |
155 | 155 | func (n Node) OuterText() string { |
156 | | - return encodeText(n.Data) |
| 156 | + return string(encodeText(n.Data)) |
| 157 | +} |
| 158 | + |
| 159 | +// OuterWords builds a space-separated string from the whitespace-separated data of all text nodes in the sub-tree, |
| 160 | +// starting from and including `n`, note that text separated / split across multiple elements will be considered as |
| 161 | +// multiple words (words within non-empty sibling elements will be split by a single space) |
| 162 | +func (n Node) OuterWords() string { |
| 163 | + return string(encodeWords(n.Data)) |
157 | 164 | } |
158 | 165 |
|
159 | 166 | // InnerHTML builds a string using the outer html of all children matching all filters (see the `FindNode` method) |
@@ -182,6 +189,25 @@ func (n Node) InnerText(filters ...func(node Node) bool) string { |
182 | 189 | return string(b) |
183 | 190 | } |
184 | 191 |
|
| 192 | +// InnerWords builds a string using the outer words of all children matching all filters (see the `FindNode` method and |
| 193 | +// the `OuterWords` methods) |
| 194 | +func (n Node) InnerWords(filters ...func(node Node) bool) string { |
| 195 | + var b []byte |
| 196 | + n.Range( |
| 197 | + func(i int, node Node) bool { |
| 198 | + if s := node.OuterWords(); s != `` { |
| 199 | + if len(b) != 0 { |
| 200 | + b = append(b, ' ') |
| 201 | + } |
| 202 | + b = append(b, []byte(s)...) |
| 203 | + } |
| 204 | + return true |
| 205 | + }, |
| 206 | + filters..., |
| 207 | + ) |
| 208 | + return string(b) |
| 209 | +} |
| 210 | + |
185 | 211 | // SiblingIndex returns the total number of previous siblings matching any filters (see the `FindNode` method) |
186 | 212 | func (n Node) SiblingIndex(filters ...func(node Node) bool) int { |
187 | 213 | return siblingIndex(n, filters...) |
|
0 commit comments