Skip to content

Commit

Permalink
DAB-1528 feat: add shared code from recommendations DAGs (#18)
Browse files Browse the repository at this point in the history
* DAB-1528 feat: add dump_object_to_s3 and load_object_from_s3 to utils

* DAB-1528 feat: update version to 0.3.0

* addded is_acceptable_recommendation to recommendations_utils
  • Loading branch information
dym-ok authored Dec 1, 2023
1 parent 86f9e3e commit a8ea4f2
Show file tree
Hide file tree
Showing 6 changed files with 268 additions and 3 deletions.
21 changes: 21 additions & 0 deletions ds_toolkit/recommendations_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,27 @@
from typing import Any, Optional

import numpy as np
from geopy.distance import distance


def is_acceptable_recommendation(
source_listing: dict, max_geo_distance: float, target_listing: dict
):
source_categories = set(source_listing["CATEGORIES"].split(","))
target_categories = set(target_listing["CATEGORIES"].split(","))
geo_dist = distance(
(source_listing["LATITUDE"], source_listing["LONGITUDE"]),
(
target_listing["LATITUDE"],
target_listing["LONGITUDE"],
),
).km

return (
target_listing["IS_ACTIVE"]
and geo_dist <= max_geo_distance
and (len(target_categories.intersection(source_categories)) > 0)
)


def get_cosine_similarity(source_vector, item_representations):
Expand Down
42 changes: 42 additions & 0 deletions ds_toolkit/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,49 @@
import pickle
from io import BytesIO
from typing import Any

from botocore.client import BaseClient
from datadog import initialize
from datadog.api.metrics import Metric


def send_datadog_metric(options, *args, **kwargs):
initialize(**options)
Metric.send(*args, **kwargs)


def dump_object_to_s3(client: BaseClient, obj: Any, bucket: str, key: str):
"""
Dump an object to S3
:param client: S3 client
:param obj: Object to dump
:param bucket: S3 bucket
:param key: S3 key
:return: None
"""
buff = BytesIO()
buff.write(pickle.dumps(obj))
buff.seek(0)

client.put_object(
Body=buff.read(),
Bucket=bucket,
Key=key,
)


def load_object_from_s3(client: BaseClient, bucket: str, key: str) -> Any:
"""
Load an object from S3
:param client: S3 client
:param bucket: S3 bucket
:param key: S3 key
:return: Loaded object
"""
buff = client.get_object(
Bucket=bucket,
Key=key,
)["Body"].read()
return pickle.loads(buff)
110 changes: 109 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
[tool.poetry]
name = "ds-toolkit"
version = "0.2.4"
version = "0.3.0"
description = "Utility package for SMG Real Estate DS team"
authors = ["Dmytro Yurchenko <dmytro.yurchenko@swissmarketplace.group>"]
license = "PROPRIETARY"
readme = "README.md"
packages = [{include = "ds_toolkit"}]

[tool.poetry.dependencies]
botocore = "^1.31.0"
geopy = "^2.4.0"
python = ">=3.8,<3.11"
numpy = "^1.24.0"
datadog = "^0.47.0"
Expand Down
45 changes: 45 additions & 0 deletions tests/test_recommendation_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from unittest import mock

import numpy as np

Expand All @@ -11,11 +12,55 @@
flatten_hgrets,
get_category_code,
get_recommendations_ordered_by_distance,
is_acceptable_recommendation,
isnull,
normalise_price,
)


@mock.patch("ds_toolkit.recommendations_utils.distance")
def test_is_acceptable_recommendation(mock_distance):
mock_distance.return_value = lambda: None
mock_distance.return_value.km = 5.0

source_listing = {
"LISTING_ID": 1,
"LATITUDE": 47.3769,
"LONGITUDE": 8.5417,
"CATEGORIES": "HOUSE,SINGLE_HOUSE",
"IS_ACTIVE": True,
}

target_listing = {
"LISTING_ID": 2,
"LATITUDE": 43.3769,
"LONGITUDE": 8.5417,
"CATEGORIES": "HOUSE",
"IS_ACTIVE": True,
}

assert (
is_acceptable_recommendation(source_listing, 10.0, target_listing)
is True
)
assert (
is_acceptable_recommendation(
source_listing, 10.0, {**target_listing, "IS_ACTIVE": False}
)
is False
)
assert (
is_acceptable_recommendation(
source_listing, 10.0, {**target_listing, "CATEGORIES": "GARAGE"}
)
is False
)
assert (
is_acceptable_recommendation(source_listing, 3.0, target_listing)
is False
)


def test_get_recommendations_ordered_by_distance():
recommended_listing_ids_map = {
1: [(20, 0.1), (30, 0.3)],
Expand Down
49 changes: 48 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from unittest import mock

from ds_toolkit.utils import send_datadog_metric
from ds_toolkit.utils import (
dump_object_to_s3,
load_object_from_s3,
send_datadog_metric,
)


@mock.patch("ds_toolkit.utils.initialize")
Expand All @@ -21,3 +25,46 @@ def test_send_datadog_metric(mock_send, mock_initialize):
tags=["test:1"], metric="test.metric", points=[(1, 1)]
)
mock_initialize.assert_called_once_with(**datadog_options)


@mock.patch("ds_toolkit.utils.pickle.dumps")
@mock.patch("ds_toolkit.utils.BytesIO")
def test_dump_object_to_s3(mock_bytesio, mock_dumps):
mock_client = mock.MagicMock()
mock_obj = mock.MagicMock()
mock_bucket = mock.MagicMock()
mock_key = mock.MagicMock()
mock_buff = mock.MagicMock()
mock_bytesio.return_value = mock_buff
mock_dumps.return_value = "pickled_obj"
mock_buff.read.return_value = "pickled_obj"

dump_object_to_s3(mock_client, mock_obj, mock_bucket, mock_key)
mock_bytesio.assert_called_once_with()
mock_dumps.assert_called_once_with(mock_obj)
mock_buff.write.assert_called_once_with("pickled_obj")
mock_buff.seek.assert_called_once_with(0)
mock_client.put_object.assert_called_once_with(
Body="pickled_obj", Bucket=mock_bucket, Key=mock_key
)


@mock.patch("ds_toolkit.utils.pickle.loads")
def test_load_object_from_s3(mock_loads):
mock_client = mock.MagicMock()
mock_bucket = mock.MagicMock()
mock_key = mock.MagicMock()
mock_obj = mock.MagicMock()
mock_client.get_object.return_value = {"Body": mock_obj}
mock_obj.read.return_value = "pickled_obj"
mock_loads.return_value = "unpickled_obj"

assert (
load_object_from_s3(mock_client, mock_bucket, mock_key)
== "unpickled_obj"
)
mock_client.get_object.assert_called_once_with(
Bucket=mock_bucket, Key=mock_key
)
mock_obj.read.assert_called_once_with()
mock_loads.assert_called_once_with("pickled_obj")

0 comments on commit a8ea4f2

Please sign in to comment.