Skip to content

Commit

Permalink
Merge pull request #115 from kuefmz/main
Browse files Browse the repository at this point in the history
some improvements to get testscases running and fixed
  • Loading branch information
JJ-Author authored Oct 26, 2024
2 parents 5bc7186 + 4f0ff50 commit d1e6044
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 106 deletions.
32 changes: 27 additions & 5 deletions ontologytimemachine/custom_proxy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from proxy.http.proxy import HttpProxyBasePlugin
from proxy.http import httpHeaders
import gzip
from io import BytesIO
from proxy.http.parser import HttpParser
from proxy.common.utils import build_http_response
from ontologytimemachine.utils.mock_responses import (
Expand Down Expand Up @@ -40,7 +42,7 @@ def __init__(self, *args, **kwargs):

def before_upstream_connection(self, request: HttpParser) -> HttpParser | None:
# self.client.config = QUOTE_NONE
logger.info("Before upstream connection hook")
logger.info("Before upstcream connection hook")
logger.info(f"Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}")
wrapped_request = HttpRequestWrapper(request)

Expand All @@ -66,10 +68,13 @@ def before_upstream_connection(self, request: HttpParser) -> HttpParser | None:
config = self.client.config
else:
logger.info("Using the proxy configuration")
config = self.config

config = self.config
if wrapped_request.is_connect_request():
logger.info(f"Handling CONNECT request: configured HTTPS interception mode: {config.httpsInterception}")
# Mark if there is a connect request
if not hasattr(self.client, "mark_connect"):
self.client.mark_connect = True

# Check whether to allow CONNECT requests since they can impose a security risk
if not do_block_CONNECT_request(config):
Expand All @@ -80,20 +85,23 @@ def before_upstream_connection(self, request: HttpParser) -> HttpParser | None:
return None

response = get_response_from_request(wrapped_request, config)
if response:
if response.status_code:
logger.info(response.status_code)
self.queue_response(response)
return None

return request

def do_intercept(self, _request: HttpParser) -> bool:
logger.info('Do intercept hook')
wrapped_request = HttpRequestWrapper(_request)

# Check if any config was provided via the authentication parameters
# If so, use that config
if hasattr(self.client, "config"):
logger.info("Using the configuration from the Auth")
config = self.client.config
logger.info(f'Config: {config}')
else:
logger.info("Using the proxy configuration")
config = self.config
Expand Down Expand Up @@ -125,6 +133,21 @@ def handle_client_request(self, request: HttpParser) -> HttpParser:
logger.info("Handle client request hook")
logger.info(f"Request method: {request.method} - Request host: {request.host} - Request path: {request.path} - Request headers: {request.headers}")

wrapped_request = HttpRequestWrapper(request)
if (wrapped_request.is_head_request() or wrapped_request.is_get_request()) and hasattr(self.client, "mark_connect"):
if self.client.mark_connect:
if hasattr(self.client, "config"):
logger.info("Using the configuration from the Auth")
config = self.client.config
else:
logger.info("Using the proxy configuration")
config = self.config
response = get_response_from_request(wrapped_request, config)
if response.status_code:
logger.info(response.status_code)
self.queue_response(response)
return None

return request

def handle_upstream_chunk(self, chunk: memoryview):
Expand All @@ -140,7 +163,6 @@ def queue_response(self, response):
},
body=response.content,
)

)


Expand Down
4 changes: 2 additions & 2 deletions ontologytimemachine/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class OntoVersion(EnumValuePrint):
ORIGINAL_FAILOVER_LIVE_LATEST = "originalFailoverLiveLatest"
LATEST_ARCHIVED = "latestArchived"
TIMESTAMP_ARCHIVED = "timestampArchived"
DEPENDENCY_MANIFEST = "dependencyManifest"
#DEPENDENCY_MANIFEST = "dependencyManifest"


class HttpsInterception(EnumValuePrint):
Expand Down Expand Up @@ -91,7 +91,7 @@ class Config:
ontoFormatConf: OntoFormatConfig = field(default_factory=OntoFormatConfig)
ontoVersion: OntoVersion = OntoVersion.ORIGINAL_FAILOVER_LIVE_LATEST
restrictedAccess: bool = False
clientConfigViaProxyAuth: ClientConfigViaProxyAuth = ClientConfigViaProxyAuth.IGNORE
clientConfigViaProxyAuth: ClientConfigViaProxyAuth = ClientConfigViaProxyAuth.REQUIRED
httpsInterception: HttpsInterception = HttpsInterception.ALL
disableRemovingRedirects: bool = False
timestamp: str = ""
Expand Down
58 changes: 32 additions & 26 deletions ontologytimemachine/utils/proxy_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def do_deny_request_due_non_archivo_ontology_uri(wrapped_request, config):


def get_response_from_request(wrapped_request, config):
logger.info('Ger response from tequest')
do_deny = do_deny_request_due_non_archivo_ontology_uri(wrapped_request, config)
if do_deny:
logger.warning(
Expand All @@ -63,6 +64,7 @@ def get_response_from_request(wrapped_request, config):
# apply for current request
def evaluate_configuration(wrapped_request, config):
authentication_str = wrapped_request.get_authentication_from_request()
logger.info(f'Evaluate configuration, auth str: {authentication_str}')
if authentication_str:
logger.info("Authentication parameters provided, parsing the configuration.")
username, password = authentication_str.split(":")
Expand Down Expand Up @@ -148,18 +150,18 @@ def request_ontology(
allow_redirects = not disableRemovingRedirects
try:
if wrapped_request.is_head_request():
response = requests.head(
url=url, headers=headers, allow_redirects=allow_redirects, timeout=5
)
response = requests.head(url=url, headers=headers, allow_redirects=allow_redirects, timeout=3)
logger.info(response.content)
logger.info(response.status_code)
else:
response = requests.get(
url=url, headers=headers, allow_redirects=allow_redirects, timeout=5
)
response = requests.get(url=url, headers=headers, allow_redirects=allow_redirects, timeout=3)
logger.info(response.content)
logger.info(response.status_code)
logger.info("Successfully fetched ontology")
return response
except Exception as e:
logger.error(f"Error fetching original ontology: {e}")
return mock_response_404()
return None


# change the function definition and pass only the config
Expand Down Expand Up @@ -189,7 +191,7 @@ def proxy_logic(wrapped_request, config):
)
elif config.ontoVersion == OntoVersion.LATEST_ARCHIVED:
logger.info('OntoVersion LATEST_ARCHIVED')
response = fetch_latest_archived(wrapped_request, ontology, headers)
response = fetch_latest_archived(wrapped_request, headers)
elif config.ontoVersion == OntoVersion.TIMESTAMP_ARCHIVED:
logger.info('OntoVersion TIMESTAMP_ARCHIVED')
response = fetch_timestamp_archived(wrapped_request, headers, config)
Expand All @@ -201,10 +203,10 @@ def proxy_logic(wrapped_request, config):


# Fetch from the original source, no matter what
def fetch_original(wrapped_request, ontology, headers, disableRemovingRedirects):
def fetch_original(wrapped_request, ontology, headers, config):
logger.info(f"Fetching original ontology from URL: {ontology}")
return request_ontology(
wrapped_request, ontology, headers, disableRemovingRedirects
wrapped_request, ontology, headers, config.disableRemovingRedirects
)


Expand All @@ -215,25 +217,29 @@ def fetch_failover(wrapped_request, headers, disableRemovingRedirects):
original_response = request_ontology(
wrapped_request, ontology, headers, disableRemovingRedirects
)
if original_response.status_code in passthrough_status_codes:
requested_mimetypes_with_priority = parse_accept_header_with_priority(
headers["Accept"]
)
requested_mimetypes = [x[0] for x in requested_mimetypes_with_priority]
response_mime_type = original_response.headers.get("Content-Type", ";").split(
";"
)[0]
logger.info(f"Requested mimetypes: {requested_mimetypes}")
logger.info(f"Response mimetype: {response_mime_type}")
if response_mime_type in requested_mimetypes:
return original_response
logger.info(f'Original response: {original_response}')
if original_response:
logger.info('Got an original response')
if original_response.status_code in passthrough_status_codes:
requested_mimetypes_with_priority = parse_accept_header_with_priority(
headers["Accept"]
)
requested_mimetypes = [x[0] for x in requested_mimetypes_with_priority]
response_mime_type = original_response.headers.get("Content-Type", ";").split(
";"
)[0]
logger.info(f"Requested mimetypes: {requested_mimetypes}")
logger.info(f"Response mimetype: {response_mime_type}")
if response_mime_type in requested_mimetypes:
return original_response
else:
logger.info(f"The returned type is not the same as the requested one")
return fetch_latest_archived(wrapped_request, headers)
else:
logger.info(f"The returned type is not the same as the requested one")
logger.info(f"The returend status code is not accepted: {original_response.status_code}")
return fetch_latest_archived(wrapped_request, headers)
else:
logger.info(
f"The returend status code is not accepted: {original_response.status_code}"
)
logger.info("No original response")
return fetch_latest_archived(wrapped_request, headers)


Expand Down
44 changes: 22 additions & 22 deletions tests/archivo_test_IRIs.tsv
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
iri error_dimension expected_error iri_type comment
http://buzzword.org.uk/rdf/personal-link-types# content text/html hash weird html instead of text/turtle
http://data.finlex.fi/schema/sfl/ content slash 0 bytes content-length
http://data.bigdatagrapes.eu/resource/ontology/ dns nxdomain slash
http://data.bigdatagrapes.eu/resource/ontology/MeasurementContext dns nxdomain term
http://data.ontotext.com/resource/leak/ http-code 502 slash
http://data.europa.eu/esco/flow http-code 406 slash
http://bdi.si.ehu.es/bdi/ontologies/ExtruOnt/ExtruOnt transport connect-timeout slash
http://catalogus-professorum.org/cpm/2/ transport connection-refused slash
http://www.w3.org/1999/02/22-rdf-syntax-ns# None hash
http://xmlns.com/foaf/0.1/ None slash
http://xmlns.com/foaf/0.1/Person None term
http://dbpedia.org/ontology/ None term
http://dbpedia.org/ontology/Person None term
https://bag2.basisregistraties.overheid.nl/bag/def/ http-code 404 slash
https://bag2.basisregistraties.overheid.nl/bag/def/Gebruiksdoel http-code 404 term
https://id.parliament.uk/schema http-code 404 slash slash onto without trailing slash /
https://id.parliament.uk/schema/Approval http-code 404 term slash onto without trailing slash /
https://bmake.th-brandenburg.de/spv# http-code 403 hash
https://bmake.th-brandenburg.de/spv http-code 403 hash just test whether Archivo API is used correctly
https://w3id.org/ttla/ transport cert-expired hash
http://data-gov.tw.rpi.edu/2009/data-gov-twc.rdf transport connection-refused hash
enable_testcase iri error_dimension expected_error iri_type comment
1 http://buzzword.org.uk/rdf/personal-link-types# content text/html hash weird html instead of text/turtle
1 http://data.finlex.fi/schema/sfl/ content 0-bytes slash 0 bytes content-length
1 http://data.bigdatagrapes.eu/resource/ontology/ dns nxdomain slash
1 http://data.bigdatagrapes.eu/resource/ontology/MeasurementContext dns nxdomain term
1 http://data.ontotext.com/resource/leak/ http-code 502 slash
1 http://data.europa.eu/esco/flow http-code 406 slash
1 http://bdi.si.ehu.es/bdi/ontologies/ExtruOnt/ExtruOnt transport connect-timeout slash
1 http://catalogus-professorum.org/cpm/2/ transport connection-refused slash
1 http://www.w3.org/1999/02/22-rdf-syntax-ns# None hash
1 http://xmlns.com/foaf/0.1/ None slash
1 http://xmlns.com/foaf/0.1/Person None term
1 http://dbpedia.org/ontology/ None term
1 http://dbpedia.org/ontology/Person None term
0 https://bag2.basisregistraties.overheid.nl/bag/def/ http-code 404 slash
0 https://bag2.basisregistraties.overheid.nl/bag/def/Gebruiksdoel http-code 404 term
0 https://id.parliament.uk/schema http-code 404 slash slash onto without trailing slash /
0 https://id.parliament.uk/schema/Approval http-code 404 term slash onto without trailing slash /
0 https://bmake.th-brandenburg.de/spv# http-code 403 hash
0 https://bmake.th-brandenburg.de/spv http-code 403 hash just test whether Archivo API is used correctly
0 https://w3id.org/ttla/ transport cert-expired hash
1 http://data-gov.tw.rpi.edu/2009/data-gov-twc.rdf transport connection-refused hash
24 changes: 12 additions & 12 deletions tests/non_archivo_test_IRIs.tsv
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
iri error_dimension expected_error iri_type comment
https://data.ontotext.com/resource/leak/ http-code 401 https is not ID
https://www.w3.org/1999/02/22-rdf-syntax-ns# None https is not ID
http://example.org None
https://example.org None
http://1.1.1.1 None
https://1.1.1.1 None
https://data.globalchange.gov/gcis.owl http-code "403 " https is not ID
https://data.ordnancesurvey.co.uk/ontology/geometry/ http-code 404 https is not ID
https://data.ordnancesurvey.co.uk/ontology/ http-code 301 https is not ID
https://google.com None

enable_testcase iri error_dimension expected_error iri_type comment
0 https://data.ontotext.com/resource/leak/ http-code 401 https is not ID
0 https://www.w3.org/1999/02/22-rdf-syntax-ns# None https is not ID
0 http://example.org None
0 https://example.org None
0 http://1.1.1.1 None
0 https://1.1.1.1 None
0 https://data.globalchange.gov/gcis.owl http-code 403 https is not ID
0 https://data.ordnancesurvey.co.uk/ontology/geometry/ http-code 404 https is not ID
0 https://data.ordnancesurvey.co.uk/ontology/ http-code 301 https is not ID
0 https://google.com None
0
File renamed without changes.
Loading

0 comments on commit d1e6044

Please sign in to comment.