diff --git a/CHANGELOG.md b/CHANGELOG.md index d9385ae..e2d6928 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,15 @@ -## v1.1 +## v1.2.0 + +### Added + +- ErrorDetails to the Root object. This will contain the templated error messages that used to be returned by Error + +## Changed + +- Error will now be one of a standard set of errors defined by the package. Details about the error message have been moved +to the ErrorDetails property of Root. + +## v1.1.0 ### Added diff --git a/examples/errors/errors.go b/examples/errors/errors.go new file mode 100644 index 0000000..c04f02e --- /dev/null +++ b/examples/errors/errors.go @@ -0,0 +1,33 @@ +// Errors happen. This example shows how to detect and handle some of them. + +package main + +import ( + "fmt" + "log" + + "github.com/anaskhan96/soup" +) + +func main() { + _, err := soup.Get("this url isn't real!") + if err != nil && err.(soup.Error).Type == soup.ErrInGetRequest { + // Handle as required! + } + + url := fmt.Sprintf("https://xkcd.com/50") + xkcd, err := soup.Get(url) + if err != nil { + // Handle it + } + xkcdSoup := soup.HTMLParse(xkcd) + links := xkcdSoup.Find("div", "id", "linkz") + if links.Error != nil && links.Error.(soup.Error).Type == soup.ErrElementNotFound { + log.Printf("Element not found: %v", links.Error) + } + // These error types were introduced in version 1.2.0, but just checking for err still works: + links = xkcdSoup.Find("div", "id", "links2") + if links.Error != nil { + log.Printf("Something happened: %s", links.Error) + } +} diff --git a/soup.go b/soup.go index 033d822..7e5ae95 100644 --- a/soup.go +++ b/soup.go @@ -6,7 +6,7 @@ package soup import ( "bytes" - "errors" + "fmt" "io/ioutil" "net/http" "regexp" @@ -15,20 +15,67 @@ import ( "golang.org/x/net/html" ) -// Root is a structure containing a pointer to an html node, the node value, and an error variable to return an error if occurred +// ErrorType defines types of errors that are possible from soup +type ErrorType int + +const ( + // ErrUnableToParse will be returned when the HTML could not be parsed + ErrUnableToParse ErrorType = iota + // ErrElementNotFound will be returned when element was not found + ErrElementNotFound + // ErrNoNextSibling will be returned when no next sibling can be found + ErrNoNextSibling + // ErrNoPreviousSibling will be returned when no previous sibling can be found + ErrNoPreviousSibling + // ErrNoNextElementSibling will be returned when no next element sibling can be found + ErrNoNextElementSibling + // ErrNoPreviousElementSibling will be returned when no previous element sibling can be found + ErrNoPreviousElementSibling + // ErrCreatingGetRequest will be returned when the get request couldn't be created + ErrCreatingGetRequest + // ErrInGetRequest will be returned when there was an error during the get request + ErrInGetRequest + // ErrReadingResponse will be returned if there was an error reading the response to our get request + ErrReadingResponse +) + +// Error allows easier introspection on the type of error returned. +// If you know you have a Error, you can compare the Type to one of the exported types +// from this package to see what kind of error it is, then further inspect the Error() method +// to see if it has more specific details for you, like in the case of a ErrElementNotFound +// type of error. +type Error struct { + Type ErrorType + msg string +} + +func (se Error) Error() string { + return se.msg +} + +func newError(t ErrorType, msg string) Error { + return Error{Type: t, msg: msg} +} + +// Root is a structure containing a pointer to an html node, the node value, and an error variable to return an error if one occurred type Root struct { Pointer *html.Node NodeValue string Error error } -var debug = false +// Init a new HTTP client for use when the client doesn't want to use their own. +var ( + defaultClient = &http.Client{} -// Headers contains all HTTP headers to send -var Headers = make(map[string]string) + debug = false -// Cookies contains all HTTP cookies to send -var Cookies = make(map[string]string) + // Headers contains all HTTP headers to send + Headers = make(map[string]string) + + // Cookies contains all HTTP cookies to send + Cookies = make(map[string]string) +) // SetDebug sets the debug status // Setting this to true causes the panics to be thrown and logged onto the console. @@ -42,6 +89,7 @@ func Header(n string, v string) { Headers[n] = v } +// Cookie sets a cookie for http requests func Cookie(n string, v string) { Cookies[n] = v } @@ -53,7 +101,7 @@ func GetWithClient(url string, client *http.Client) (string, error) { if debug { panic("Couldn't perform GET request to " + url) } - return "", errors.New("couldn't perform GET request to " + url) + return "", newError(ErrCreatingGetRequest, "error creating get request to "+url) } // Set headers for hName, hValue := range Headers { @@ -72,7 +120,7 @@ func GetWithClient(url string, client *http.Client) (string, error) { if debug { panic("Couldn't perform GET request to " + url) } - return "", errors.New("couldn't perform GET request to " + url) + return "", newError(ErrInGetRequest, "couldn't perform GET request to "+url) } defer resp.Body.Close() bytes, err := ioutil.ReadAll(resp.Body) @@ -80,16 +128,14 @@ func GetWithClient(url string, client *http.Client) (string, error) { if debug { panic("Unable to read the response body") } - return "", errors.New("unable to read the response body") + return "", newError(ErrReadingResponse, "unable to read the response body") } return string(bytes), nil } // Get returns the HTML returned by the url in string using the default HTTP client func Get(url string) (string, error) { - // Init a new HTTP client - client := &http.Client{} - return GetWithClient(url, client) + return GetWithClient(url, defaultClient) } // HTMLParse parses the HTML returning a start pointer to the DOM @@ -99,7 +145,7 @@ func HTMLParse(s string) Root { if debug { panic("Unable to parse the HTML") } - return Root{nil, "", errors.New("unable to parse the HTML")} + return Root{Error: newError(ErrUnableToParse, "unable to parse the HTML")} } for r.Type != html.ElementNode { switch r.Type { @@ -111,7 +157,7 @@ func HTMLParse(s string) Root { r = r.NextSibling } } - return Root{r, r.Data, nil} + return Root{Pointer: r, NodeValue: r.Data} } // Find finds the first occurrence of the given tag name, @@ -123,9 +169,9 @@ func (r Root) Find(args ...string) Root { if debug { panic("Element `" + args[0] + "` with attributes `" + strings.Join(args[1:], " ") + "` not found") } - return Root{nil, "", errors.New("element `" + args[0] + "` with attributes `" + strings.Join(args[1:], " ") + "` not found")} + return Root{Error: newError(ErrElementNotFound, fmt.Sprintf("element `%s` with attributes `%s` not found", args[0], strings.Join(args[1:], " ")))} } - return Root{temp, temp.Data, nil} + return Root{Pointer: temp, NodeValue: temp.Data} } // FindAll finds all occurrences of the given tag name, @@ -142,7 +188,7 @@ func (r Root) FindAll(args ...string) []Root { } pointers := make([]Root, 0, len(temp)) for i := 0; i < len(temp); i++ { - pointers = append(pointers, Root{temp[i], temp[i].Data, nil}) + pointers = append(pointers, Root{Pointer: temp[i], NodeValue: temp[i].Data}) } return pointers } @@ -155,9 +201,9 @@ func (r Root) FindStrict(args ...string) Root { if debug { panic("Element `" + args[0] + "` with attributes `" + strings.Join(args[1:], " ") + "` not found") } - return Root{nil, "", errors.New("element `" + args[0] + "` with attributes `" + strings.Join(args[1:], " ") + "` not found")} + return Root{nil, "", newError(ErrElementNotFound, fmt.Sprintf("element `%s` with attributes `%s` not found", args[0], strings.Join(args[1:], " ")))} } - return Root{temp, temp.Data, nil} + return Root{Pointer: temp, NodeValue: temp.Data} } // FindAllStrict finds all occurrences of the given tag name @@ -172,7 +218,7 @@ func (r Root) FindAllStrict(args ...string) []Root { } pointers := make([]Root, 0, len(temp)) for i := 0; i < len(temp); i++ { - pointers = append(pointers, Root{temp[i], temp[i].Data, nil}) + pointers = append(pointers, Root{Pointer: temp[i], NodeValue: temp[i].Data}) } return pointers } @@ -185,9 +231,9 @@ func (r Root) FindNextSibling() Root { if debug { panic("No next sibling found") } - return Root{nil, "", errors.New("no next sibling found")} + return Root{Error: newError(ErrNoNextSibling, "no next sibling found")} } - return Root{nextSibling, nextSibling.Data, nil} + return Root{Pointer: nextSibling, NodeValue: nextSibling.Data} } // FindPrevSibling finds the previous sibling of the pointer in the DOM @@ -198,9 +244,10 @@ func (r Root) FindPrevSibling() Root { if debug { panic("No previous sibling found") } - return Root{nil, "", errors.New("no previous sibling found")} + + return Root{Error: newError(ErrNoPreviousSibling, "no previous sibling found")} } - return Root{prevSibling, prevSibling.Data, nil} + return Root{Pointer: prevSibling, NodeValue: prevSibling.Data} } // FindNextElementSibling finds the next element sibling of the pointer in the DOM @@ -211,12 +258,12 @@ func (r Root) FindNextElementSibling() Root { if debug { panic("No next element sibling found") } - return Root{nil, "", errors.New("no next element sibling found")} + return Root{Error: newError(ErrNoNextElementSibling, "no next element sibling found")} } if nextSibling.Type == html.ElementNode { - return Root{nextSibling, nextSibling.Data, nil} + return Root{Pointer: nextSibling, NodeValue: nextSibling.Data} } - p := Root{nextSibling, nextSibling.Data, nil} + p := Root{Pointer: nextSibling, NodeValue: nextSibling.Data} return p.FindNextElementSibling() } @@ -228,12 +275,12 @@ func (r Root) FindPrevElementSibling() Root { if debug { panic("No previous element sibling found") } - return Root{nil, "", errors.New("no previous element sibling found")} + return Root{Error: newError(ErrNoPreviousElementSibling, "no previous element sibling found")} } if prevSibling.Type == html.ElementNode { - return Root{prevSibling, prevSibling.Data, nil} + return Root{Pointer: prevSibling, NodeValue: prevSibling.Data} } - p := Root{prevSibling, prevSibling.Data, nil} + p := Root{Pointer: prevSibling, NodeValue: prevSibling.Data} return p.FindPrevElementSibling() } @@ -242,7 +289,7 @@ func (r Root) Children() []Root { child := r.Pointer.FirstChild var children []Root for child != nil { - children = append(children, Root{child, child.Data, nil}) + children = append(children, Root{Pointer: child, NodeValue: child.Data}) child = child.NextSibling } return children diff --git a/soup_test.go b/soup_test.go index be511ca..0239a8d 100644 --- a/soup_test.go +++ b/soup_test.go @@ -5,6 +5,8 @@ import ( "strconv" "strings" "testing" + + "github.com/stretchr/testify/assert" ) const testHTML = ` @@ -205,10 +207,24 @@ func TestFullText(t *testing.T) { } func TestFullTextEmpty(t *testing.T) { - //