Skip to content

Commit

Permalink
Introduce functions for user agent parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
lpellegr committed Jun 13, 2024
1 parent d5e9333 commit 0cd403a
Show file tree
Hide file tree
Showing 8 changed files with 216 additions and 42 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Changed
- Require Python 3.8+.
- Add new functions for user-agent header value parsing: `batch_parse_user_agents`, `parse_user_agent`.
- API key is passed as header value and no longer as query parameter.
- Client library method are now wrapped in a new _ApiResponse_ object that includes a mean to retrieve metadata
about _credits_ and _throttling_ in addition to _data_.
Expand Down
14 changes: 12 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@ response = client.origin_lookup_ip()
print(response.data)
```

#### User-Agent Parsing

```python
from ipregistry import IpregistryClient

client = IpregistryClient("YOUR_API_KEY")
response = client.parse_user_agent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36')
print(response.data)
```

More advanced examples are available in the [samples](https://github.com/ipregistry/ipregistry-python/tree/master/samples)
folder.

Expand Down Expand Up @@ -108,9 +118,9 @@ A manner to proceed is to identify bots using the `User-Agent` header.
To ease this process, the library includes a utility method:

```python
from ipregistry import UserAgent
from ipregistry import UserAgents

is_bot = UserAgent.is_bot('YOUR_USER_AGENT_HEADER_VALUE_HERE')
is_bot = UserAgents.is_bot('YOUR_USER_AGENT_HEADER_VALUE_HERE')
```

## Other Libraries
Expand Down
46 changes: 30 additions & 16 deletions ipregistry/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,40 +31,46 @@ def __init__(self, key_or_config, **kwargs):
raise ValueError("Given request handler instance is not of type IpregistryRequestHandler")

def batch_lookup_ips(self, ips, **options):
sparse_cache = [None] * len(ips)
return self.batch_request(ips, self._requestHandler.batch_lookup_ips, **options)

def batch_parse_user_agents(self, user_agents, **options):
return self.batch_request(user_agents, self._requestHandler.batch_parse_user_agents, **options)

def batch_request(self, items, request_handler_func, **options):
sparse_cache = [None] * len(items)
cache_misses = []

for i in range(0, len(ips)):
ip = ips[i]
cache_key = self.__build_cache_key(ip, options)
for i in range(len(items)):
item = items[i]
cache_key = self.__build_cache_key(item, options)
cache_value = self._cache.get(cache_key)
if cache_value is None:
cache_misses.append(ip)
cache_misses.append(item)
else:
sparse_cache[i] = cache_value

result = [None] * len(ips)
result = [None] * len(items)
if len(cache_misses) > 0:
response = self._requestHandler.batch_lookup_ips(cache_misses, options)
response = request_handler_func(cache_misses, options)
else:
response = ApiResponse(
ApiResponseCredits(),
[],
ApiResponseThrottling()
)

fresh_ip_info = response.data
fresh_item_info = response.data
j = 0
k = 0

for cached_ip_info in sparse_cache:
if cached_ip_info is None:
if not isinstance(fresh_ip_info[k], LookupError):
self._cache.put(self.__build_cache_key(ips[j], options), fresh_ip_info[k])
result[j] = fresh_ip_info[k]
for cached_item_info in sparse_cache:
if cached_item_info is None:
if not isinstance(fresh_item_info[k], LookupError):
self._cache.put(self.__build_cache_key(items[j], options), fresh_item_info[k])
result[j] = fresh_item_info[k]
k += 1
else:
result[j] = cached_ip_info
result[j] = cached_item_info
j += 1

response.data = result
Expand All @@ -80,6 +86,9 @@ def lookup_ip(self, ip='', **options):
def origin_lookup_ip(self, **options):
return self.__lookup_ip('', options)

def origin_parse_user_agent(self, **options):
return self._requestHandler.origin_parse_user_agent(options)

def __lookup_ip(self, ip, options):
cache_key = self.__build_cache_key(ip, options)
cache_value = self._cache.get(cache_key)
Expand All @@ -96,9 +105,14 @@ def __lookup_ip(self, ip, options):
ApiResponseThrottling()
)

def parse_user_agent(self, user_agent, **options):
response = self.batch_parse_user_agents([user_agent], **options)
response.data = response.data[0]
return response

@staticmethod
def __build_cache_key(ip, options):
result = ip
def __build_cache_key(key, options):
result = key

for key, value in options.items():
if isinstance(value, bool):
Expand Down
6 changes: 4 additions & 2 deletions ipregistry/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,10 @@ class IpInfo(BaseModel):
class RequesterAutonomousSystem(AutonomousSystem):
pass


class RequesterIpInfo(IpInfo):
user_agent: Optional[UserAgent] = None

model_config = ConfigDict(extra='ignore')
model_config = ConfigDict(extra='ignore')

class RequesterUserAgent(UserAgent):
pass
57 changes: 54 additions & 3 deletions ipregistry/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from .__init__ import __version__
from .model import (ApiError, ApiResponse, ApiResponseCredits, ApiResponseThrottling, ClientError, IpInfo,
LookupError, RequesterIpInfo)
LookupError, RequesterIpInfo, RequesterUserAgent, UserAgent)


class IpregistryRequestHandler(ABC):
Expand All @@ -35,6 +35,10 @@ def __init__(self, config):
def batch_lookup_ips(self, ips, options):
pass

@abstractmethod
def batch_parse_user_agents(self, user_agents, options):
pass

@abstractmethod
def lookup_ip(self, ip, options):
pass
Expand All @@ -43,8 +47,12 @@ def lookup_ip(self, ip, options):
def origin_lookup_ip(self, options):
pass

def _build_base_url(self, ip, options):
result = self._config.base_url + "/" + ip
@abstractmethod
def origin_parse_user_agent(self, options):
pass

def _build_base_url(self, resource, options):
result = self._config.base_url + "/" + resource

i = 0
for key, value in options.items():
Expand Down Expand Up @@ -80,6 +88,29 @@ def batch_lookup_ips(self, ips, options):
except Exception as e:
raise ClientError(e)

def batch_parse_user_agents(self, user_agents, options):
response = None
try:
response = requests.post(
self._build_base_url('user_agent', options),
data=json.dumps(user_agents),
headers=self.__headers(),
timeout=self._config.timeout
)
response.raise_for_status()
results = response.json().get('results', [])

parsed_results = [
LookupError(data) if 'code' in data else UserAgent(**data)
for data in results
]

return self.build_api_response(response, parsed_results)
except requests.HTTPError:
self.__create_api_error(response)
except Exception as e:
raise ClientError(e)

def lookup_ip(self, ip, options):
response = None
try:
Expand All @@ -104,6 +135,26 @@ def lookup_ip(self, ip, options):
def origin_lookup_ip(self, options):
return self.lookup_ip('', options)

def origin_parse_user_agent(self, options):
response = None
try:
response = requests.get(
self._build_base_url('user_agent', options),
headers=self.__headers(),
timeout=self._config.timeout
)
response.raise_for_status()
json_response = response.json()

return self.build_api_response(
response,
RequesterUserAgent(**json_response)
)
except requests.HTTPError:
self.__create_api_error(response)
except Exception as err:
raise ClientError(err)

@staticmethod
def build_api_response(response, data):
throttling_limit = DefaultRequestHandler.__convert_to_int(response.headers.get('x-rate-limit-limit'))
Expand Down
32 changes: 32 additions & 0 deletions samples/batch-parse-user-agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Copyright 2019 Ipregistry (https://ipregistry.co).
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from ipregistry import ApiError, ClientError, IpregistryClient

try:
api_key = "tryout"
client = IpregistryClient(api_key)
response = client.batch_parse_user_agents([
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36'
])
print(response.data)
except ApiError as e:
print("API error", e)
except ClientError as e:
print("Client error", e)
except Exception as e:
print("Unexpected error", e)
29 changes: 29 additions & 0 deletions samples/parse-user-agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
Copyright 2019 Ipregistry (https://ipregistry.co).
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from ipregistry import ApiError, ClientError, IpregistryClient

try:
api_key = "tryout"
client = IpregistryClient(api_key)
response = client.parse_user_agent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36')
print(response.data)
except ApiError as e:
print("API error", e)
except ClientError as e:
print("Client error", e)
except Exception as e:
print("Unexpected error", e)
Loading

0 comments on commit 0cd403a

Please sign in to comment.