Skip to content

Commit

Permalink
improve method PlainText.
Browse files Browse the repository at this point in the history
  • Loading branch information
Hǎiliàng Wáng committed May 14, 2015
1 parent bb6aee0 commit 1cd0bf4
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 11 deletions.
11 changes: 0 additions & 11 deletions node.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,6 @@ func (n *Node) AllText(pat ...string) *string {
return nil
}

func (n *Node) PlainText() string {
if n == nil {
return ""
}
var buf bytes.Buffer
for _, s := range n.Descendants(TextNode).Strings(GetText) {
buf.WriteString(s)
}
return buf.String()
}

func (n *Node) Render() *string {
if n == nil {
return nil
Expand Down
108 changes: 108 additions & 0 deletions plain.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright 2005, Hǎiliàng Wáng. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package query

import (
"bytes"
"strings"

"golang.org/x/net/html"
"h12.me/html-query/expr"
)

func (n *Node) PlainText() *string {
if n == nil {
return nil
}
var w bytes.Buffer
if err := renderPlain(&w, &n.n); err != nil {
return nil
}
s := strings.TrimSpace(w.String())
return &s
}

func renderPlain(w writer, n *html.Node) error {
switch n.Type {
case html.TextNode:
w.WriteString(n.Data)
case html.DocumentNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := renderPlain(w, c); err != nil {
return err
}
}
return nil
case html.ElementNode:
return renderPlainElementNode(w, n)
}
return nil
}

func renderPlainElementNode(w writer, n *html.Node) error {
if c := n.FirstChild; c != nil && c.Type == html.TextNode && strings.HasPrefix(c.Data, "\n") {
switch n.Data {
case "pre", "listing", "textarea":
if err := w.WriteByte('\n'); err != nil {
return err
}
}
}

switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.TextNode {
if _, err := w.WriteString(c.Data); err != nil {
return err
}
} else {
if err := renderPlain(w, c); err != nil {
return err
}
}
}
if n.Data == "plaintext" {
return plaintextAbort
}
return nil
case "a":
if isURL(n.FirstChild.Data) {
renderPlainChild(w, n)
} else if url := expr.GetAttr(n, "href"); url != nil && *url != "" {
w.WriteString("[")
renderPlainChild(w, n)
w.WriteString("](")
w.WriteString(*url)
w.WriteString(")")
} else {
renderPlainChild(w, n)
}
return nil
}

renderPlainChild(w, n)
// write break after children are written
switch n.Data {
case "p", "br":
writeBreak(w)
}
return nil
}

func isURL(s string) bool {
s = strings.TrimSpace(s)
return strings.Contains(s, "http://") ||
strings.Contains(s, "@")
}

func renderPlainChild(w writer, n *html.Node) error {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := renderPlain(w, c); err != nil {
return err
}
}
return nil
}

0 comments on commit 1cd0bf4

Please sign in to comment.