From 94afb75403a5d7ed3cf9b4e44c07ef19330ec53e Mon Sep 17 00:00:00 2001 From: delirehberi Date: Thu, 19 Jul 2018 01:48:11 +0300 Subject: [PATCH] initialize --- license.txt | 21 ++++++++++ main.go | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++ readme.md | 8 ++++ 3 files changed, 144 insertions(+) create mode 100644 license.txt create mode 100644 main.go create mode 100644 readme.md diff --git a/license.txt b/license.txt new file mode 100644 index 0000000..63b4b68 --- /dev/null +++ b/license.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) [year] [fullname] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/main.go b/main.go new file mode 100644 index 0000000..286ac92 --- /dev/null +++ b/main.go @@ -0,0 +1,115 @@ +package main + +import ( + "encoding/xml" + "fmt" + "log" + "net/http" + "os" +) + +// URLSet is root for site mite +type URLSet struct { + XMLName xml.Name `xml:"urlset"` + XMLNs string `xml:"xmlns,attr"` + URL []URL `xml:"url"` +} + +// URL is for every single location url +type URL struct { + Loc string `xml:"loc"` +} + +func main() { + if len(os.Args) < 3 { + help() + } + sitemapURL := os.Args[1] + outputFileName := os.Args[2] + resp, err := http.Get(sitemapURL) + if err != nil { + log.Printf("Urls cannot fetched: %s\n", sitemapURL) + log.Println(err) + os.Exit(1) + } + rawXMLData := readXMLFromResponse(resp) + urlSet := URLSet{} + + err = xml.Unmarshal(rawXMLData, &urlSet) + if err != nil { + log.Printf("Sitemap cannot parsed. Because: %s", err) + os.Exit(1) + } + c := make(chan string) + validURLs := []string{} + for _, url := range urlSet.URL { + go checkURL(url.Loc, c, &validURLs) + } + + for range urlSet.URL { + fmt.Println(<-c) + } + + newURLSet := URLSet{ + XMLNs: urlSet.XMLNs, + } + for _, url := range validURLs { + newURL := URL{ + Loc: url, + } + newURLSet.URL = append(newURLSet.URL, newURL) + } + newRawXML, err := xml.Marshal(newURLSet) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + err = saveValidSiteMap(outputFileName, newRawXML) + if err != nil { + fmt.Println("I can`₺ write valid sitemap. Error: ", err) + os.Exit(1) + } + fmt.Println("File writed to ", outputFileName, "and closed") +} + +func readXMLFromResponse(resp *http.Response) []byte { + var rawXMLData []byte + for { + content := make([]byte, 1024) + n, _ := resp.Body.Read(content) + for _, d := range content { + rawXMLData = append(rawXMLData, d) + } + if n == 0 { + break + } + } + return rawXMLData +} +func checkURL(url string, c chan string, validURLs *[]string) { + resp, err := http.Get(url) + if err != nil { + c <- err.Error() + } + c <- fmt.Sprintf("Response code is %d for %s", resp.StatusCode, url) + if resp.StatusCode == 200 { + (*validURLs) = append((*validURLs), url) + } +} +func saveValidSiteMap(filename string, data []byte) error { + file, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0777) + file.Write([]byte(xml.Header)) + file.Write(data) + file.Close() + return err +} + +func help() { + fmt.Printf( + `You have to type sitemap url and output file name +Usage: checker http://sitename.com/sitemap.xml sitemap.xml +`, + ) + os.Exit(1) +} diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..2aea886 --- /dev/null +++ b/readme.md @@ -0,0 +1,8 @@ +## Sitemap Checker + +This tool helps you to make sure for url is valid in your sites sitemap file. + +Usage: + +` ./checker http://sitename.com/sitemap.xml output.xml ` +