Skip to content

Commit

Permalink
Add support for scraping AtCoder contest samples
Browse files Browse the repository at this point in the history
  • Loading branch information
yaegashi committed Jun 17, 2020
1 parent ae2334d commit 4570f9b
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 87 deletions.
92 changes: 92 additions & 0 deletions cmd/contest-cli/atcoder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package main

import (
"fmt"
"io/ioutil"
"log"
"net/http"
"net/url"
"path/filepath"
"strings"

"github.com/PuerkitoBio/goquery"
"github.com/yaegashi/contest.go/tester"
)

const AtCoderBaseURL = "https://atcoder.jp/contests/"

func createAtCoderTask(outDir string, taskURL string) error {
res, err := http.Get(taskURL)
if err != nil {
return err
}
defer res.Body.Close()
if res.StatusCode != 200 {
return fmt.Errorf("Failed to fetch %s: %s", taskURL, res.Status)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return err
}
opts := &tester.DirectoryOptions{
OmitSample: true,
Preamble: fmt.Sprintf("// Solution for %s", taskURL),
}
err = tester.CreateDirectory(outDir, opts)
if err != nil {
return err
}
doc.Find("section").Each(func(i int, s *goquery.Selection) {
h3 := s.Find("h3").First().Text()
if strings.HasPrefix(h3, "Sample Input ") {
pre := s.Find("pre").First().Text()
fn := filepath.Join(outDir, fmt.Sprintf("sample%s.in.txt", h3[13:]))
ioutil.WriteFile(fn, []byte(pre), 0644)
log.Println("I: Created", fn)
}
if strings.HasPrefix(h3, "Sample Output ") {
pre := s.Find("pre").First().Text()
fn := filepath.Join(outDir, fmt.Sprintf("sample%s.out.txt", h3[14:]))
ioutil.WriteFile(fn, []byte(pre), 0644)
log.Println("I: Created", fn)
}
})
return nil
}

func createAtCoderContest(outDir string, tasksURL string) error {
u1, err := url.Parse(tasksURL)
if err != nil {
return err
}
res, err := http.Get(tasksURL)
if err != nil {
return err
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return fmt.Errorf("Failed to fetch %s: %s", tasksURL, res.Status)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
doc.Find("div > h2:first-child").Each(func(i int, s1 *goquery.Selection) {
if s1.Text() != "Tasks" {
return
}
s1.Parent().Find("tbody tr").Each(func(i int, s2 *goquery.Selection) {
a := s2.Find("td").First().Find("a")
p := a.Text()
href, ok := a.Attr("href")
if p == "" || !ok {
return
}
u2, err := url.Parse(href)
if err != nil {
return
}
p = strings.ToLower(p) + "1"
u3 := u1.ResolveReference(u2)
createAtCoderTask(filepath.Join(outDir, p), u3.String())
})
})
return nil
}
102 changes: 16 additions & 86 deletions cmd/contest-cli/main.go
Original file line number Diff line number Diff line change
@@ -1,101 +1,31 @@
package main

import (
"io/ioutil"
"log"
"os"
"path/filepath"
)

const mainGo = `package main
import (
"bufio"
"fmt"
"io"
"os"
)
type contest struct {
in io.Reader
out io.Writer
}
func (con *contest) Scan(a ...interface{}) (int, error) {
return fmt.Fscan(con.in, a...)
}
func (con *contest) Scanln(a ...interface{}) (int, error) {
return fmt.Fscanln(con.in, a...)
}
func (con *contest) Scanf(f string, a ...interface{}) (int, error) {
return fmt.Fscanf(con.in, f, a...)
}
func (con *contest) Print(a ...interface{}) (int, error) {
return fmt.Fprint(con.out, a...)
}
func (con *contest) Println(a ...interface{}) (int, error) {
return fmt.Fprintln(con.out, a...)
}
func (con *contest) Printf(f string, a ...interface{}) (int, error) {
return fmt.Fprintf(con.out, f, a...)
}
func main() {
in := bufio.NewReader(os.Stdin)
out := bufio.NewWriter(os.Stdout)
defer out.Flush()
con := &contest{in: in, out: out}
con.main()
}
func (con *contest) main() error {
var s string
con.Scan(&s)
con.Println("hello,", s)
return nil
}
`

const mainTestGo = `package main
import (
"io"
"testing"
"strings"

"github.com/yaegashi/contest.go/tester"
)

func TestContest(t *testing.T) {
tester.Run(t, "*.txt", func(in io.Reader, out io.Writer) error {
con := &contest{in: in, out: out}
return con.main()
})
}
`

const testCaseTxt = "go\n--\nhello, go\n"

func main() {
for _, dir := range os.Args[1:] {
err := os.MkdirAll(dir, 0755)
if err != nil {
log.Fatal(err)
}
log.Printf("Created %s", dir)

files := []struct {
path, content string
}{
{"main.go", mainGo},
{"main_test.go", mainTestGo},
{"testcase00.txt", testCaseTxt},
}
for _, file := range files {
path := filepath.Join(dir, file.path)
err := ioutil.WriteFile(path, []byte(file.content), 0644)
if err != nil {
log.Fatal(err)
for _, arg := range os.Args[1:] {
var err error
if strings.HasPrefix(arg, AtCoderBaseURL) {
dir := strings.Trim(arg[len(AtCoderBaseURL):], "/")
problem := strings.Split(dir, "/")[0]
if problem == "" {
log.Printf("E: Wrong AtCoder URL: %s", arg)
continue
}
log.Printf("Created %s", path)
tasksURL := AtCoderBaseURL + filepath.Join(problem, "tasks")
err = createAtCoderContest(problem, tasksURL)
} else {
err = tester.CreateDirectory(arg, nil)
}
if err != nil {
log.Printf("E: Failed to process %s: %s", arg, err)
}
}
}
6 changes: 5 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,8 @@ module github.com/yaegashi/contest.go

go 1.12

require github.com/pkg/diff v0.0.0-20190930165518-531926345625
require (
github.com/PuerkitoBio/goquery v1.5.1
github.com/pkg/diff v0.0.0-20190930165518-531926345625
github.com/stretchr/testify v1.6.1 // indirect
)
22 changes: 22 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pkg/diff v0.0.0-20190930165518-531926345625 h1:b5m9ubdpxvfhiJnF64/W1rUTSUOzKHipjy5wOWsZCBM=
github.com/pkg/diff v0.0.0-20190930165518-531926345625/go.mod h1:kFj35MyHn14a6pIgWhm46KVjJr5CHys3eEYxkuKD1EI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

0 comments on commit 4570f9b

Please sign in to comment.