From ba11501cf49e8feed8799d2c4d6a00a64ef05eff Mon Sep 17 00:00:00 2001 From: windblows95 Date: Sat, 3 Aug 2024 12:59:10 -0400 Subject: [PATCH] search: change endpoint to media-imdb.com, json, faster --- search.go | 153 ++++++++++++++------------------------------- search_test.go | 166 +++++++++++++++++-------------------------------- 2 files changed, 105 insertions(+), 214 deletions(-) diff --git a/search.go b/search.go index dfade4e..6bbdc96 100644 --- a/search.go +++ b/search.go @@ -1,139 +1,80 @@ package imdb import ( + "encoding/json" "errors" "fmt" - "io/ioutil" + "io" "net/http" - "net/url" - "regexp" - "strconv" + "strings" ) -const searchURL = "https://www.imdb.com/find?%s" +const searchURL = "https://v3.sg.media-imdb.com/suggestion/x/%s.json?includeVideos=0" -var ( - titleIDLinkRE = regexp.MustCompile(`(.*?)`) - // searchTitleRE matches on titles. - searchTitleRE = regexp.MustCompile(` 3 { - return nil, fmt.Errorf("search: too many extras") - } - for i, x := range extras { - if i == 0 && searchRomanRE.Match(x[1]) { - continue // ignore roman numerals used for duplicates in a year - } - if digits := searchYearRE.FindSubmatch(x[1]); digits != nil { - year, err := strconv.Atoi(string(digits[0])) - if err != nil { - return nil, err // should not happen as regexp matches digits - } - titleYear = year - } else { - titleType = string(x[1]) - } - } - id := string(r[1]) - t = append(t, Title{ - ID: id, - URL: fmt.Sprintf(titleURL, id), - Name: decode(string(r[2])), - Year: titleYear, - Type: titleType, - }) - } - return t, nil -} - -var ( - // searchTitleListRE matches on each result in the list. - newSearchTitleListRE = regexp.MustCompile(``) - // searchTitleRE matches on titles. - newSearchTitleRE = regexp.MustCompile(` 3 { - t.Errorf("> 3 errors: %v", errors) - } - errors = []string{} - for i, want := range []string{ - "TV Series", - "", - "", - "", - "TV Series", - } { - if r[i].Type != want { - errors = append(errors, fmt.Sprintf("SearchTitle(%s)[%d].Type = %s; want %s", title, i, r[i].Type, want)) + t.Errorf("SearchTitle(\"Letterkenny\") error: %v", err) + } else { + for i, wGot := range got { + if err := diffStruct(wGot, want[i]); err != nil { + t.Errorf("SearchTitle(\"Letterkenny\") error: %v", err) + } } } - if len(errors) > 3 { - t.Errorf("> 3 errors: %v", errors) - } -} - -func TestSearchTitleUnicode(t *testing.T) { - title := "Les Filles De L'Océan" - r, err := SearchTitle(client, title) - if err != nil { - t.Fatalf("SearchTitle(%s) error: %v", title, err) - } - if len(r) == 0 { - t.Fatalf("SearchTitle(%s) len = %d; want %d", title, len(r), 1) - } - if accepted := map[string]bool{ - "tt5761478": true, // Harlots (TV Series) (2017-2019) - "tt0244764": true, // Rip Girls (TV Movie) (2000) - "tt0098797": true, // Les filles de Caleb (TV Series) (1990-) - "tt22522556": true, // Les Filles de l'Océan - }; !accepted[r[0].ID] { - t.Errorf("SearchTitle(%s)[0] = %v; want any of %v", title, r[0].ID, accepted) - } -} - -func TestSearchTitlePositions(t *testing.T) { - title := "Burlesque" - r, err := SearchTitle(client, title) - if err != nil { - t.Fatalf("SearchTitle(%s) error: %v", title, err) - } - if len(r) < 3 { - t.Fatalf("SearchTitle(%s) len = %d; want %d", title, len(r), 1) - } - if accepted := map[string]bool{ - "tt1126591": true, // Burlesque (I) (2010) - "tt1586713": true, // Burlesque (II) (2010) - "tt11288016": true, // Jak si nepodelat zivot (2019) (TV Mini Series) aka "Burlesque" - }; !accepted[r[0].ID] { - t.Errorf("SearchTitle(%s)[0] = %v; want any of %v", title, r[0].ID, accepted) - } -} - -func TestMachete(t *testing.T) { - title := "Machete Kills Again... In Space!" - r, err := SearchTitle(client, title) - if err != nil { - t.Fatalf("SearchTitle(%s) error: %v", title, err) - } - if len(r) == 0 { - t.Fatalf("SearchTitle(%s) len = %d; want > 0", title, len(r)) - } - if accepted := map[string]bool{ - "tt2002719": true, - }; !accepted[r[0].ID] { - t.Errorf("SearchTitle(%s)[0] = %v; want any of %v", title, r[0].ID, accepted) - } }