From 94cd4a992388c05945f2d5e4fdb41d5eaa0a9a90 Mon Sep 17 00:00:00 2001 From: midir99 Date: Sun, 24 Jul 2022 01:40:14 -0500 Subject: [PATCH] "Has visto a" alerts from Guerrero were not "titlelized" their names, it was fixed, also custom alerts from Morelos now are scraping the publication date --- cmd/cmd.go | 2 +- ws/gro.go | 2 +- ws/mor.go | 13 ++++++++----- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/cmd/cmd.go b/cmd/cmd.go index 4a27867..ac702d3 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -129,7 +129,7 @@ func ParseArgs() (*Args, error) { } func PrintVersion() { - fmt.Println("rastreadora v0.3.0 ") + fmt.Println("rastreadora v0.3.1") } func SelectScraperFuncs(scraper Scraper) (func(*html.Node) []mpp.MissingPersonPoster, func(uint64) string, error) { diff --git a/ws/gro.go b/ws/gro.go index 939c2a9..519e19f 100644 --- a/ws/gro.go +++ b/ws/gro.go @@ -160,7 +160,7 @@ func ScrapeGroHasVistoAAlerts(doc *html.Node) []mpp.MissingPersonPoster { mpps := []mpp.MissingPersonPoster{} for _, figure := range QueryAll(doc, "figure") { h4 := Query(figure, "h4") - mpName := h4.FirstChild.Data + mpName := cases.Title(language.LatinAmericanSpanish).String(h4.FirstChild.Data) missingDate, _ := time.Parse("2006-01-02", h4.LastChild.Data) postUrl := AttrOr(Query(figure, "a"), "href", "") if postUrl == "" { diff --git a/ws/mor.go b/ws/mor.go index 789d047..e4b78c2 100644 --- a/ws/mor.go +++ b/ws/mor.go @@ -46,15 +46,15 @@ func ParseMorDate(value string) (time.Time, error) { case "diciembre": month = time.December default: - return time.Time{}, fmt.Errorf("unable to parse date %s", value) + return time.Time{}, fmt.Errorf("unable to parse date %s (unknown month: %s)", value, month) } - day, err := strconv.Atoi(date[DAY_INDEX]) + day, err := strconv.Atoi(strings.TrimSpace(strings.Replace(date[DAY_INDEX], ",", "", 1))) if err != nil { - return time.Time{}, fmt.Errorf("unable to parse date %s", value) + return time.Time{}, fmt.Errorf("unable to parse date %s (invalid day number: %s)", value, date[DAY_INDEX]) } year, err := strconv.Atoi(date[YEAR_INDEX]) if err != nil { - return time.Time{}, fmt.Errorf("unable to parse date %s", value) + return time.Time{}, fmt.Errorf("unable to parse date %s (invalid year number: %s)", value, date[YEAR_INDEX]) } return time.Date(year, month, day, 0, 0, 0, 0, time.UTC), nil } @@ -113,7 +113,10 @@ func ScrapeMorCustomAlerts(doc *html.Node) []mpp.MissingPersonPoster { continue } poPostPublicationDate, _ := ParseMorDate(strings.TrimSpace(Query(article, "span").FirstChild.Data)) - poPosterUrl, _ := url.Parse(strings.TrimSpace(AttrOr(Query(article, "img"), "src", ""))) + posterUrl := strings.TrimSpace(AttrOr(Query(article, "img"), "src", "")) + posterUrl = strings.Replace(posterUrl, "-300x225", "", 1) + posterUrl = strings.Replace(posterUrl, "-300x240", "", 1) + poPosterUrl, _ := url.Parse(posterUrl) mpps = append(mpps, mpp.MissingPersonPoster{ MpName: mpName, PoPosterUrl: poPosterUrl,