Skip to content

Commit

Permalink
Add random user agent to API requests
Browse files Browse the repository at this point in the history
  • Loading branch information
hg committed Feb 15, 2021
1 parent b6d75bf commit 9b6a764
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 70 deletions.
20 changes: 6 additions & 14 deletions gateway/airkaz/airkaz.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@ import (
"github.com/hg/airmon/tm"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
"github.com/pkg/errors"
"io/ioutil"
"log"
"net/http"
"regexp"
"time"
)
Expand Down Expand Up @@ -44,7 +42,7 @@ func Collect(sender *influx.MeasurementSender) {
if measurements, err := getResponse(client); err == nil {
log.Print("found ", len(measurements), " airkaz measurements")

toSave := make([]measurement, len(measurements))
toSave := make([]*measurement, len(measurements))

for _, meas := range measurements {
if meas.Error != 0 || meas.Status != "active" || meas.Hour != "now" {
Expand All @@ -70,32 +68,26 @@ func Collect(sender *influx.MeasurementSender) {
}
}

func getResponse(client *http.Client) ([]measurement, error) {
resp, err := client.Get("https://airkaz.org/")
func getResponse(client *net.Client) ([]*measurement, error) {
body, err := client.Get("https://airkaz.org/")
if err != nil {
return nil, errors.Wrap(err, "airkaz get failed")
}
defer resp.Body.Close()

body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, errors.Wrap(err, "could not read response: ")
return nil, err
}

matches := dataRe.FindSubmatch(body)
if matches == nil {
return nil, errors.Wrap(err, "measurement json not found in response")
}

var measurements []measurement
var measurements []*measurement
if err = json.Unmarshal(matches[1], &measurements); err != nil {
return nil, errors.Wrap(err, "could not parse response: ")
}

return measurements, nil
}

func saveMeasurement(meas measurement, sender *influx.MeasurementSender) {
func saveMeasurement(meas *measurement, sender *influx.MeasurementSender) {
tags := map[string]string{
"city": meas.City,
"station": meas.Name,
Expand Down
26 changes: 5 additions & 21 deletions gateway/ceb/ceb.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
package ceb

import (
"encoding/json"
"github.com/hg/airmon/influx"
"github.com/hg/airmon/net"
"github.com/hg/airmon/tm"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
"github.com/pkg/errors"
"io/ioutil"
"log"
"net/http"
"strings"
"time"
)
Expand All @@ -33,7 +29,7 @@ func Collect(sender *influx.MeasurementSender) {
for {
if measurements, err := getResponse(client); err == nil {
latest := time.Time{}
toSave := make([]measurement, len(measurements))
toSave := make([]*measurement, len(measurements))

log.Print("found ", len(measurements), " ceb measurements")

Expand Down Expand Up @@ -61,7 +57,7 @@ func Collect(sender *influx.MeasurementSender) {
}
}

func saveMeasurement(ms measurement, sender *influx.MeasurementSender) {
func saveMeasurement(ms *measurement, sender *influx.MeasurementSender) {
endOfFormula := strings.Index(ms.PollutantFull, "-")
if endOfFormula <= 0 {
return
Expand All @@ -83,19 +79,7 @@ func saveMeasurement(ms measurement, sender *influx.MeasurementSender) {
sender.Send(influxdb2.NewPoint("ceb", tags, fields, ms.Date.Time))
}

func getResponse(client *http.Client) ([]measurement, error) {
resp, err := client.Get("https://ceb-uk.kz/map/ajax.php?markers")
if err != nil {
return nil, errors.Wrap(err, "data fetch failed")
}
defer resp.Body.Close()

body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, errors.Wrap(err, "could not read response")
}

var measurements []measurement
err = json.Unmarshal(body, &measurements)
return measurements, err
func getResponse(client *net.Client) (measurements []*measurement, err error) {
err = client.GetJSON("https://ceb-uk.kz/map/ajax.php?markers", &measurements)
return
}
33 changes: 6 additions & 27 deletions gateway/kazhydromet/kazhydromet.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
package kazhydromet

import (
"encoding/json"
"github.com/hg/airmon/influx"
"github.com/hg/airmon/net"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
"github.com/pkg/errors"
"io/ioutil"
"log"
"net/http"
"strconv"
"time"
)
Expand Down Expand Up @@ -51,7 +48,7 @@ type entry struct {
}

type collector struct {
client *http.Client
client *net.Client
sender *influx.MeasurementSender
stations map[int64]*station
lastAt map[string]time.Time
Expand Down Expand Up @@ -168,42 +165,24 @@ func (c *collector) loadMeasurements() ([]*measurement, error) {
url += "?after=" + c.lastRunAt.UTC().Format(time.RFC3339)
}

resp, err := c.client.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()

body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}

var measurements []*measurement
if err = json.Unmarshal(body, &measurements); err == nil {

err := c.client.GetJSON(url, &measurements)
if err == nil {
c.lastRunAt = startedAt
}

return measurements, err
}

func (c *collector) loadStations() error {
resp, err := c.client.Get("http://atmosphera.kz:4004/stations")
if err != nil {
return err
}
defer resp.Body.Close()
var stations []*station

body, err := ioutil.ReadAll(resp.Body)
err := c.client.GetJSON("http://atmosphera.kz:4004/stations", &stations)
if err != nil {
return err
}

var stations []*station
if err = json.Unmarshal(body, &stations); err != nil {
return err
}

c.stations = make(map[int64]*station)

for _, stat := range stations {
Expand Down
82 changes: 74 additions & 8 deletions gateway/net/net.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,91 @@ package net

import (
"context"
"encoding/json"
"github.com/pkg/errors"
"golang.org/x/net/proxy"
"io/ioutil"
"math/rand"
"net"
"net/http"
"net/url"
"time"
)

func NewProxiedClient() *http.Client {
var userAgents = []string{
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36",
"Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/87.0.4280.77 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:85.0) Gecko/20100101 Firefox/85.0",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:85.0) Gecko/20100101 Firefox/85.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.2 Safari/605.1.15",
"Mozilla/5.0 (Android 11; Mobile; rv:68.0) Gecko/68.0 Firefox/85.0",
}

type Client struct {
client *http.Client
}

func randomUserAgent() string {
return userAgents[rand.Intn(len(userAgents))]
}

func NewProxiedClient() *Client {
proxyDialer := proxy.FromEnvironmentUsing(&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
})
return &http.Client{
Transport: &http.Transport{
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
return proxyDialer.Dial(network, addr)
return &Client{
client: &http.Client{
Transport: &http.Transport{
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
return proxyDialer.Dial(network, addr)
},
MaxIdleConns: 1,
IdleConnTimeout: 1 * time.Minute,
TLSHandshakeTimeout: 10 * time.Second,
},
MaxIdleConns: 1,
IdleConnTimeout: 1 * time.Minute,
TLSHandshakeTimeout: 10 * time.Second,
},
}
}

func baseDomain(fullUrl string) string {
if parsed, err := url.Parse(fullUrl); err != nil {
return ""
} else {
parsed.Opaque = ""
parsed.Path = ""
parsed.RawQuery = ""
parsed.ForceQuery = false
parsed.Fragment = ""
parsed.RawFragment = ""
return parsed.String()
}
}

func (c *Client) Get(uri string) ([]byte, error) {
req, err := http.NewRequest("GET", uri, nil)
if err != nil {
return nil, err
}

if domain := baseDomain(uri); domain != "" {
req.Header.Set("Referer", domain)
}
req.Header.Set("User-Agent", randomUserAgent())

resp, err := c.client.Do(req)
if err != nil {
return nil, errors.Wrap(err, "airkaz get failed")
}
defer resp.Body.Close()

return ioutil.ReadAll(resp.Body)
}

func (c *Client) GetJSON(url string, buf interface{}) error {
data, err := c.Get(url)
if err != nil {
err = json.Unmarshal(data, buf)
}
return err
}

0 comments on commit 9b6a764

Please sign in to comment.