From 4570f9b5c93fa9013c0b57b4965d112c7aaf0e31 Mon Sep 17 00:00:00 2001 From: YAEGASHI Takeshi Date: Wed, 17 Jun 2020 13:26:01 +0900 Subject: [PATCH] Add support for scraping AtCoder contest samples --- cmd/contest-cli/atcoder.go | 92 +++++++++++++++++++++++++++++++++ cmd/contest-cli/main.go | 102 ++++++------------------------------- go.mod | 6 ++- go.sum | 22 ++++++++ 4 files changed, 135 insertions(+), 87 deletions(-) create mode 100644 cmd/contest-cli/atcoder.go diff --git a/cmd/contest-cli/atcoder.go b/cmd/contest-cli/atcoder.go new file mode 100644 index 0000000..abc0125 --- /dev/null +++ b/cmd/contest-cli/atcoder.go @@ -0,0 +1,92 @@ +package main + +import ( + "fmt" + "io/ioutil" + "log" + "net/http" + "net/url" + "path/filepath" + "strings" + + "github.com/PuerkitoBio/goquery" + "github.com/yaegashi/contest.go/tester" +) + +const AtCoderBaseURL = "https://atcoder.jp/contests/" + +func createAtCoderTask(outDir string, taskURL string) error { + res, err := http.Get(taskURL) + if err != nil { + return err + } + defer res.Body.Close() + if res.StatusCode != 200 { + return fmt.Errorf("Failed to fetch %s: %s", taskURL, res.Status) + } + doc, err := goquery.NewDocumentFromReader(res.Body) + if err != nil { + return err + } + opts := &tester.DirectoryOptions{ + OmitSample: true, + Preamble: fmt.Sprintf("// Solution for %s", taskURL), + } + err = tester.CreateDirectory(outDir, opts) + if err != nil { + return err + } + doc.Find("section").Each(func(i int, s *goquery.Selection) { + h3 := s.Find("h3").First().Text() + if strings.HasPrefix(h3, "Sample Input ") { + pre := s.Find("pre").First().Text() + fn := filepath.Join(outDir, fmt.Sprintf("sample%s.in.txt", h3[13:])) + ioutil.WriteFile(fn, []byte(pre), 0644) + log.Println("I: Created", fn) + } + if strings.HasPrefix(h3, "Sample Output ") { + pre := s.Find("pre").First().Text() + fn := filepath.Join(outDir, fmt.Sprintf("sample%s.out.txt", h3[14:])) + ioutil.WriteFile(fn, []byte(pre), 0644) + log.Println("I: Created", fn) + } + }) + return nil +} + +func createAtCoderContest(outDir string, tasksURL string) error { + u1, err := url.Parse(tasksURL) + if err != nil { + return err + } + res, err := http.Get(tasksURL) + if err != nil { + return err + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + return fmt.Errorf("Failed to fetch %s: %s", tasksURL, res.Status) + } + doc, err := goquery.NewDocumentFromReader(res.Body) + doc.Find("div > h2:first-child").Each(func(i int, s1 *goquery.Selection) { + if s1.Text() != "Tasks" { + return + } + s1.Parent().Find("tbody tr").Each(func(i int, s2 *goquery.Selection) { + a := s2.Find("td").First().Find("a") + p := a.Text() + href, ok := a.Attr("href") + if p == "" || !ok { + return + } + u2, err := url.Parse(href) + if err != nil { + return + } + p = strings.ToLower(p) + "1" + u3 := u1.ResolveReference(u2) + createAtCoderTask(filepath.Join(outDir, p), u3.String()) + }) + }) + return nil +} diff --git a/cmd/contest-cli/main.go b/cmd/contest-cli/main.go index 9330b32..25af5fe 100644 --- a/cmd/contest-cli/main.go +++ b/cmd/contest-cli/main.go @@ -1,101 +1,31 @@ package main import ( - "io/ioutil" "log" "os" "path/filepath" -) - -const mainGo = `package main - -import ( - "bufio" - "fmt" - "io" - "os" -) - -type contest struct { - in io.Reader - out io.Writer -} - -func (con *contest) Scan(a ...interface{}) (int, error) { - return fmt.Fscan(con.in, a...) -} -func (con *contest) Scanln(a ...interface{}) (int, error) { - return fmt.Fscanln(con.in, a...) -} -func (con *contest) Scanf(f string, a ...interface{}) (int, error) { - return fmt.Fscanf(con.in, f, a...) -} -func (con *contest) Print(a ...interface{}) (int, error) { - return fmt.Fprint(con.out, a...) -} -func (con *contest) Println(a ...interface{}) (int, error) { - return fmt.Fprintln(con.out, a...) -} -func (con *contest) Printf(f string, a ...interface{}) (int, error) { - return fmt.Fprintf(con.out, f, a...) -} -func main() { - in := bufio.NewReader(os.Stdin) - out := bufio.NewWriter(os.Stdout) - defer out.Flush() - con := &contest{in: in, out: out} - con.main() -} - -func (con *contest) main() error { - var s string - con.Scan(&s) - con.Println("hello,", s) - return nil -} -` - -const mainTestGo = `package main - -import ( - "io" - "testing" + "strings" "github.com/yaegashi/contest.go/tester" ) -func TestContest(t *testing.T) { - tester.Run(t, "*.txt", func(in io.Reader, out io.Writer) error { - con := &contest{in: in, out: out} - return con.main() - }) -} -` - -const testCaseTxt = "go\n--\nhello, go\n" - func main() { - for _, dir := range os.Args[1:] { - err := os.MkdirAll(dir, 0755) - if err != nil { - log.Fatal(err) - } - log.Printf("Created %s", dir) - - files := []struct { - path, content string - }{ - {"main.go", mainGo}, - {"main_test.go", mainTestGo}, - {"testcase00.txt", testCaseTxt}, - } - for _, file := range files { - path := filepath.Join(dir, file.path) - err := ioutil.WriteFile(path, []byte(file.content), 0644) - if err != nil { - log.Fatal(err) + for _, arg := range os.Args[1:] { + var err error + if strings.HasPrefix(arg, AtCoderBaseURL) { + dir := strings.Trim(arg[len(AtCoderBaseURL):], "/") + problem := strings.Split(dir, "/")[0] + if problem == "" { + log.Printf("E: Wrong AtCoder URL: %s", arg) + continue } - log.Printf("Created %s", path) + tasksURL := AtCoderBaseURL + filepath.Join(problem, "tasks") + err = createAtCoderContest(problem, tasksURL) + } else { + err = tester.CreateDirectory(arg, nil) + } + if err != nil { + log.Printf("E: Failed to process %s: %s", arg, err) } } } diff --git a/go.mod b/go.mod index 4b06301..8a5b567 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,8 @@ module github.com/yaegashi/contest.go go 1.12 -require github.com/pkg/diff v0.0.0-20190930165518-531926345625 +require ( + github.com/PuerkitoBio/goquery v1.5.1 + github.com/pkg/diff v0.0.0-20190930165518-531926345625 + github.com/stretchr/testify v1.6.1 // indirect +) diff --git a/go.sum b/go.sum index 0ea752a..ed76141 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,25 @@ +github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= +github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= +github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= +github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pkg/diff v0.0.0-20190930165518-531926345625 h1:b5m9ubdpxvfhiJnF64/W1rUTSUOzKHipjy5wOWsZCBM= github.com/pkg/diff v0.0.0-20190930165518-531926345625/go.mod h1:kFj35MyHn14a6pIgWhm46KVjJr5CHys3eEYxkuKD1EI= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=