Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of Categories Http fetcher #882

Merged
merged 7 commits into from
May 20, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,7 @@ func SetupViper(v *viper.Viper, filename string) {
v.SetDefault("datacache.ttl_seconds", 0)
v.SetDefault("category_mapping.filesystem.enabled", true)
v.SetDefault("category_mapping.filesystem.directorypath", "./static/category-mapping")
v.SetDefault("category_mapping.http.endpoint", "")
v.SetDefault("stored_requests.filesystem", false)
v.SetDefault("stored_requests.directorypath", "./stored_requests/data/by_id")
v.SetDefault("stored_requests.postgres.connection.dbname", "")
Expand Down
6 changes: 3 additions & 3 deletions exchange/exchange.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func (e *exchange) HoldAuction(ctx context.Context, bidRequest *openrtb.BidReque
conversions := e.currencyConverter.Rates()

adapterBids, adapterExtra := e.getAllBids(auctionCtx, cleanRequests, aliases, bidAdjustmentFactors, blabels, conversions)
bidCategory, adapterBids, err := applyCategoryMapping(requestExt, adapterBids, *categoriesFetcher, targData)
bidCategory, adapterBids, err := applyCategoryMapping(ctx, requestExt, adapterBids, *categoriesFetcher, targData)
auc := newAuction(adapterBids, len(bidRequest.Imp))
if err != nil {
return nil, fmt.Errorf("Error in category mapping : %s", err.Error())
Expand Down Expand Up @@ -321,7 +321,7 @@ func (e *exchange) buildBidResponse(ctx context.Context, liveAdapters []openrtb_
return bidResponse, err
}

func applyCategoryMapping(requestExt openrtb_ext.ExtRequest, seatBids map[openrtb_ext.BidderName]*pbsOrtbSeatBid, categoriesFetcher stored_requests.CategoryFetcher, targData *targetData) (map[string]string, map[openrtb_ext.BidderName]*pbsOrtbSeatBid, error) {
func applyCategoryMapping(ctx context.Context, requestExt openrtb_ext.ExtRequest, seatBids map[openrtb_ext.BidderName]*pbsOrtbSeatBid, categoriesFetcher stored_requests.CategoryFetcher, targData *targetData) (map[string]string, map[openrtb_ext.BidderName]*pbsOrtbSeatBid, error) {
res := make(map[string]string)

type bidDedupe struct {
Expand Down Expand Up @@ -375,7 +375,7 @@ func applyCategoryMapping(requestExt openrtb_ext.ExtRequest, seatBids map[openrt
continue
} else {
//if unique IAB category is present then translate it to the adserver category based on mapping file
category, err = categoriesFetcher.FetchCategories(primaryAdServer, publisher, bidIabCat[0])
category, err = categoriesFetcher.FetchCategories(ctx, primaryAdServer, publisher, bidIabCat[0])
if err != nil || category == "" {
//TODO: add metrics
//if mapping required but no mapping file is found then discard the bid
Expand Down
4 changes: 2 additions & 2 deletions exchange/exchange_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,7 @@ func TestCategoryMapping(t *testing.T) {

adapterBids[bidderName1] = &seatBid

bidCategory, adapterBids, err := applyCategoryMapping(requestExt, adapterBids, categoriesFetcher, targData)
bidCategory, adapterBids, err := applyCategoryMapping(nil, requestExt, adapterBids, categoriesFetcher, targData)

assert.Equal(t, nil, err, "Category mapping error should be empty")
assert.Equal(t, "10.00_Electronics_30s", bidCategory["bid_id1"], "Category mapping doesn't match")
Expand Down Expand Up @@ -652,7 +652,7 @@ func TestCategoryDedupe(t *testing.T) {

adapterBids[bidderName1] = &seatBid

bidCategory, adapterBids, err := applyCategoryMapping(requestExt, adapterBids, categoriesFetcher, targData)
bidCategory, adapterBids, err := applyCategoryMapping(nil, requestExt, adapterBids, categoriesFetcher, targData)

assert.Equal(t, nil, err, "Category mapping error should be empty")
assert.Equal(t, 2, len(adapterBids[bidderName1].bids), "Bidders number doesn't match")
Expand Down
2 changes: 1 addition & 1 deletion stored_requests/backends/db_fetcher/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func (fetcher *dbFetcher) FetchRequests(ctx context.Context, requestIDs []string
return storedRequestData, storedImpData, errs
}

func (fetcher *dbFetcher) FetchCategories(primaryAdServer, publisherId, iabCategory string) (string, error) {
func (fetcher *dbFetcher) FetchCategories(ctx context.Context, primaryAdServer, publisherId, iabCategory string) (string, error) {
return "", nil
}

Expand Down
2 changes: 1 addition & 1 deletion stored_requests/backends/empty_fetcher/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@ func (fetcher EmptyFetcher) FetchRequests(ctx context.Context, requestIDs []stri
return
}

func (fetcher EmptyFetcher) FetchCategories(primaryAdServer, publisherId, iabCategory string) (string, error) {
func (fetcher EmptyFetcher) FetchCategories(ctx context.Context, primaryAdServer, publisherId, iabCategory string) (string, error) {
return "", nil
}
13 changes: 4 additions & 9 deletions stored_requests/backends/file_fetcher/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,7 @@ func NewFileFetcher(directory string) (stored_requests.AllFetcher, error) {

type eagerFetcher struct {
FileSystem FileSystem
Categories map[string]map[string]Category
}

type Category struct {
Id string
Name string
Categories map[string]map[string]stored_requests.Category
}

func (fetcher *eagerFetcher) FetchRequests(ctx context.Context, requestIDs []string, impIDs []string) (map[string]json.RawMessage, map[string]json.RawMessage, []error) {
Expand All @@ -38,15 +33,15 @@ func (fetcher *eagerFetcher) FetchRequests(ctx context.Context, requestIDs []str
return storedRequests, storedImpressions, errs
}

func (fetcher *eagerFetcher) FetchCategories(primaryAdServer, publisherId, iabCategory string) (string, error) {
func (fetcher *eagerFetcher) FetchCategories(ctx context.Context, primaryAdServer, publisherId, iabCategory string) (string, error) {
fileName := primaryAdServer

if len(publisherId) != 0 {
fileName = primaryAdServer + "_" + publisherId
}

if fetcher.Categories == nil {
fetcher.Categories = make(map[string]map[string]Category)
fetcher.Categories = make(map[string]map[string]stored_requests.Category)
}
if data, ok := fetcher.Categories[fileName]; ok {
return data[iabCategory].Id, nil
Expand All @@ -56,7 +51,7 @@ func (fetcher *eagerFetcher) FetchCategories(primaryAdServer, publisherId, iabCa

if file, ok := primaryAdServerDir.Files[fileName]; ok {

tmp := make(map[string]Category)
tmp := make(map[string]stored_requests.Category)

if err := json.Unmarshal(file, &tmp); err != nil {
return "", fmt.Errorf("Unable to unmarshal categories for adserver: '%s', publisherId: '%s'", primaryAdServer, publisherId)
Expand Down
10 changes: 5 additions & 5 deletions stored_requests/backends/file_fetcher/fetcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func TestCategoriesFetcherWithPublisher(t *testing.T) {
if err != nil {
t.Errorf("Failed to create a category Fetcher: %v", err)
}
category, err := fetcher.FetchCategories("test", "categories", "IAB1-1")
category, err := fetcher.FetchCategories(nil, "test", "categories", "IAB1-1")
assert.Equal(t, nil, err, "Categories were loaded incorrectly")
assert.Equal(t, "Beverages", category, "Categories were loaded incorrectly")
}
Expand All @@ -124,7 +124,7 @@ func TestCategoriesFetcherWithoutPublisher(t *testing.T) {
if err != nil {
t.Errorf("Failed to create a category Fetcher: %v", err)
}
category, err := fetcher.FetchCategories("test", "", "IAB1-1")
category, err := fetcher.FetchCategories(nil, "test", "", "IAB1-1")
assert.Equal(t, nil, err, "Categories were loaded incorrectly")
assert.Equal(t, "VideoGames", category, "Categories were loaded incorrectly")
}
Expand All @@ -134,7 +134,7 @@ func TestCategoriesFetcherNoCategory(t *testing.T) {
if err != nil {
t.Errorf("Failed to create a category Fetcher: %v", err)
}
_, fetchingErr := fetcher.FetchCategories("test", "", "IAB1-100")
_, fetchingErr := fetcher.FetchCategories(nil, "test", "", "IAB1-100")
assert.Equal(t, fmt.Errorf("Unable to find category for adserver 'test', publisherId: '', iab category: 'IAB1-100'"),
fetchingErr, "Categories were loaded incorrectly")
}
Expand All @@ -144,7 +144,7 @@ func TestCategoriesFetcherBrokenJson(t *testing.T) {
if err != nil {
t.Errorf("Failed to create a category Fetcher: %v", err)
}
_, fetchingErr := fetcher.FetchCategories("test", "broken", "IAB1-100")
_, fetchingErr := fetcher.FetchCategories(nil, "test", "broken", "IAB1-100")
assert.Equal(t, fmt.Errorf("Unable to unmarshal categories for adserver: 'test', publisherId: 'broken'"),
fetchingErr, "Categories were loaded incorrectly")
}
Expand All @@ -154,7 +154,7 @@ func TestCategoriesFetcherNoCategoriesFile(t *testing.T) {
if err != nil {
t.Errorf("Failed to create a category Fetcher: %v", err)
}
_, fetchingErr := fetcher.FetchCategories("test", "not_exists", "IAB1-100")
_, fetchingErr := fetcher.FetchCategories(nil, "test", "not_exists", "IAB1-100")
assert.Equal(t, fmt.Errorf("Unable to find mapping file for adserver: 'test', publisherId: 'not_exists'"),
fetchingErr, "Categories were loaded incorrectly")
}
57 changes: 52 additions & 5 deletions stored_requests/backends/http_fetcher/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ func NewFetcher(client *http.Client, endpoint string) *HttpFetcher {
}

type HttpFetcher struct {
client *http.Client
Endpoint string
hasQuery bool
client *http.Client
Endpoint string
hasQuery bool
Categories map[string]map[string]stored_requests.Category
}

func (fetcher *HttpFetcher) FetchRequests(ctx context.Context, requestIDs []string, impIDs []string) (requestData map[string]json.RawMessage, impData map[string]json.RawMessage, errs []error) {
Expand All @@ -80,8 +81,54 @@ func (fetcher *HttpFetcher) FetchRequests(ctx context.Context, requestIDs []stri
return
}

func (fetcher *HttpFetcher) FetchCategories(primaryAdServer, publisherId, iabCategory string) (string, error) {
return "", nil
func (fetcher *HttpFetcher) FetchCategories(ctx context.Context, primaryAdServer, publisherId, iabCategory string) (string, error) {
if fetcher.Categories == nil {
fetcher.Categories = make(map[string]map[string]stored_requests.Category)
}

//in NewFetcher function there is a code to add "?" at the end of url
//in case of categories we don't expect to have any parameters, that's why we need to remove "?"
Copy link
Contributor

@josephveach josephveach May 16, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few comments about this code:

  1. NewFetcher() is capable of returning "&" as the last character as well as "?" (in which case there will be a "?" somewhere earlier in the string). I'm guessing from the above comment that this will never happen for categories. Is that correct? If so, might it be better (i.e., probably more efficient and more readable) to use strings.TrimSuffix() to simply remove the "?" from the end of the string? If "?" is not guaranteed to be at the end of the string, then shouldn't everything from the "?" in the line forward be removed? It might be safest to allow for the possibility of "?" not being at the end of the line (in case of future changes) and use strings.IndexByte() to find the "?" and chop off the end of the line from there; this works regardless of the location of "?".

  2. The url variable is assigned twice when publisherId has a value, both times making the same call to strings.Replace(). The data name variable is also assigned a value twice. It would be a little more efficient to simply declare url and dataName as strings, then use an if-statement to assign each variable once.

  3. Taking efficiency considerations to an extreme, if it's likely that fetcher.Categories[dataName] is likely to be set more often than not (I don't know how one would know this), it might be even more efficient to put off setting the value of url until it is needed, in case we never get there. Drawback to this is it's less efficient (two if-statements needed to check publisherId) when fetcher.Categories[dataName] is unset.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Joe, @josephveach thank you for your comment!

  1. Please see above discussion with Jason about this. There is a code snipped with logic of adding "?" at the end of URL: Implementation of Categories Http fetcher #882 (comment)
    We don't expect to have URL with parameters for http fetcher, here is how url builder looks like:
url = fmt.Sprintf("%s/%s/%s.json", strings.Replace(fetcher.Endpoint, "?", "", -1), primaryAdServer, publisherId)
  1. Refactored

  2. URL builder is just a simple string concatenation. We can separate it and have 2 publisher id checks. I would leave it as it is now.

Please let me know what do you think.

Copy link
Contributor

@josephveach josephveach May 17, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Veronika, Let me clarify my thoughts regarding (1):
Looking purely at the NewFetcher() code, it seems that the returned value of Endpoint can be in two forms:
(a) if there is no query string in the original "endpoint" argument to NewFetcher() (i.e., endpoint was of form someURL), then Endpoint is simply
someURL?
which works fine with the given code, but
(b) if "endpoint" contained a query string, i.e., was of form someURL?someQuery, then Endpoint would be
someURL?someQuery&
which I believe does not work properly with the given code.

The comment simply states "we don't expect to have any parameters" in the category case, which does not seem absolute or clear to me (e.g., does "don't expect" mean it's impossible? does "parameters" equate to what would be in the query?). Furthermore, as I've already indicated, the use of strings.Replace() probably doesn't achieve the desired effect on the second form (b) above of the returned value of Endpoint.

IMHO, it would be more clear to someone who is not terribly familiar with the underlying code that gets us here if (1) this code used strings.TrimSuffix() instead of strings.Replace(), which would emphasize the '?' occurs only as the last character and, possibly, (2) if the comment were a little more strong, i.e., something like "in case of categories, there is never a query string in the endpoint so it always ends with '?'", which is more certain than "we don't expect". Actually, looking at previous discussion, I prefer Jason's explanation above: "category fetcher is expecting its variable parts to be in the URL path whereas other fetchers are passing them in URL query params", though maybe "expecting" could be replaced by something stronger in a code comment.
I also expect strings.TrimSuffix() to run faster than strings.Replace() because it doesn't need to examine the entire string, though I haven't confirmed that.

BTW, I see that NewFetcher() also does an unnecessary second assignment to urlPrefix. The first portion of the function could be changed to simply:

   var urlPrefix string
   if strings.Contains(endpoint, "?") {
	urlPrefix = endpoint + "&"
} else {
	urlPrefix = endpoint + "?"
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Joe, strings.Replace is changed to strings.TrimSuffix.
Refactoring existing code in NewFetcher is beyond the scope of this PR and will require to refactor buildRequest function as well.
Also there are no specific requirements to categories URL, so maybe url builder will be changed in future.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My original comment suggested to chop off the end of the line starting at the "?" if there was a chance of the "?" ever not being the last character. You seem to be saying that could be a possibility in the future, in which case this solution could lead to a problem at that time. In any case, I've made my suggestion and I have no other objections so I'll give my approval of the PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you Joe!
With current implementation we just want to delete last "?". If it's not there - url is returned unchanged. Please see this documentation: https://golang.org/pkg/strings/#TrimSuffix

url := fmt.Sprintf("%s/%s.json", strings.Replace(fetcher.Endpoint, "?", "", -1), primaryAdServer)

dataName := primaryAdServer

if publisherId != "" {
dataName = fmt.Sprintf("%s_%s", primaryAdServer, publisherId)
url = fmt.Sprintf("%s/%s/%s.json", strings.Replace(fetcher.Endpoint, "?", "", -1), primaryAdServer, publisherId)
}

if data, ok := fetcher.Categories[dataName]; ok {
if val, ok := data[iabCategory]; ok {
return val.Id, nil
} else {
return "", fmt.Errorf("Unable to find category mapping for adserver: '%s', publisherId: '%s'", primaryAdServer, publisherId)
}
}

httpReq, err := http.NewRequest("GET", url, nil)
if err != nil {
return "", err
}

httpResp, err := ctxhttp.Do(ctx, fetcher.client, httpReq)
if err != nil {
return "", err
}
defer httpResp.Body.Close()

respBytes, err := ioutil.ReadAll(httpResp.Body)
tmp := make(map[string]stored_requests.Category)

if err := json.Unmarshal(respBytes, &tmp); err != nil {
return "", fmt.Errorf("Unable to unmarshal categories for adserver: '%s', publisherId: '%s'", primaryAdServer, publisherId)
}
fetcher.Categories[dataName] = tmp

if val, ok := tmp[iabCategory]; ok {
return val.Id, nil
} else {
return "", fmt.Errorf("Unable to find category mapping for adserver: '%s', publisherId: '%s'", primaryAdServer, publisherId)
}
}

func buildRequest(endpoint string, requestIDs []string, impIDs []string) (*http.Request, error) {
Expand Down
11 changes: 8 additions & 3 deletions stored_requests/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ type Fetcher interface {

type CategoryFetcher interface {
// FetchCategories fetches the ad-server/publisher specific category for the given IAB category
FetchCategories(primaryAdServer, publisherId, iabCategory string) (string, error)
FetchCategories(ctx context.Context, primaryAdServer, publisherId, iabCategory string) (string, error)
}

// AllFetcher is an iterface that encapsulates both the original Fetcher and the CategoryFetcher
type AllFetcher interface {
FetchRequests(ctx context.Context, requestIDs []string, impIDs []string) (requestData map[string]json.RawMessage, impData map[string]json.RawMessage, errs []error)
FetchCategories(primaryAdServer, publisherId, iabCategory string) (string, error)
FetchCategories(ctx context.Context, primaryAdServer, publisherId, iabCategory string) (string, error)
}

// NotFoundError is an error type to flag that an ID was not found by the Fetcher.
Expand All @@ -44,6 +44,11 @@ type NotFoundError struct {
DataType string
}

type Category struct {
Id string
Name string
}

func (e NotFoundError) Error() string {
return fmt.Sprintf(`Stored %s with ID="%s" not found.`, e.DataType, e.ID)
}
Expand Down Expand Up @@ -176,7 +181,7 @@ func (f *fetcherWithCache) FetchRequests(ctx context.Context, requestIDs []strin
return
}

func (f *fetcherWithCache) FetchCategories(primaryAdServer, publisherId, iabCategory string) (string, error) {
func (f *fetcherWithCache) FetchCategories(ctx context.Context, primaryAdServer, publisherId, iabCategory string) (string, error) {
return "", nil
}

Expand Down
2 changes: 1 addition & 1 deletion stored_requests/fetcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ func (f *mockFetcher) FetchRequests(ctx context.Context, requestIDs []string, im
return args.Get(0).(map[string]json.RawMessage), args.Get(1).(map[string]json.RawMessage), args.Get(2).([]error)
}

func (f *mockFetcher) FetchCategories(primaryAdServer, publisherId, iabCategory string) (string, error) {
func (f *mockFetcher) FetchCategories(ctx context.Context, primaryAdServer, publisherId, iabCategory string) (string, error) {
return "", nil
}

Expand Down
4 changes: 2 additions & 2 deletions stored_requests/multifetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ func (mf MultiFetcher) FetchRequests(ctx context.Context, requestIDs []string, i
return
}

func (mf MultiFetcher) FetchCategories(primaryAdServer, publisherId, iabCategory string) (string, error) {
func (mf MultiFetcher) FetchCategories(ctx context.Context, primaryAdServer, publisherId, iabCategory string) (string, error) {
for _, f := range mf {
if cf, ok := f.(CategoryFetcher); ok {
iabCategory, _ := cf.FetchCategories(primaryAdServer, publisherId, iabCategory)
iabCategory, _ := cf.FetchCategories(ctx, primaryAdServer, publisherId, iabCategory)
if iabCategory != "" {
return iabCategory, nil
}
Expand Down