Skip to content

Commit

Permalink
[MDS-6086] Fixed ssl validation error when running permit pipeline in…
Browse files Browse the repository at this point in the history
… test, enable https for elasticsearch when running locally (#3210)

* [MDS-6086] Added support for async permit condition extraction using celery

* MDS-6086 Tweaks after PR feedback

* MDS-6986 Fixed tests + added github action to run permit service tests

* Added permit service test github action

* Set up env variables before running permit tests

* MDS-6086 Fixed typo in requirements.txt

* Added test api key

* Remove auth requirement in tests

* Add debug folder

* MDS-6086 Moved tests folder

* MDS-6086 Run elasticsearch using https locally

* MDS-6086 Fixed celery setup to accept certs

* Fixed cert job startup issue
  • Loading branch information
simensma-fresh authored Aug 12, 2024
1 parent 6ba3241 commit b57c827
Show file tree
Hide file tree
Showing 10 changed files with 169 additions and 48 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ ms:

permits:
@echo "+\n++ Running Permit Service ...\n+"
@docker compose $(DC_FILE) up -d haystack-api
@docker compose $(DC_FILE) up -d haystack haystack_celery

extra:
@echo "+\n++ Building tertiary services ...\n+"
Expand Down
6 changes: 3 additions & 3 deletions services/permits/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ The Permit Service is a question-answering (QA) service built on top of deepset-
make permits
```

The service should now be running and ready to accept user queries.
The service should now be running and ready to accept user queries. Note: It can take a couple of minutes for all the services to start (especially elasticsearch can be slow to spin up).

## Usage

Expand All @@ -56,7 +56,7 @@ This endpoint receives the question as a string and allows the requester to set

Kibana can be accessed at http://localhost:5601/.

If Kibana prompts you to enter an address for elasticsearch, put in http://elasticsearch:9200. No further setup should be necessary in order to use the app.
If Kibana prompts you to enter an address for elasticsearch, put in https://elasticsearch:9200. No further setup should be necessary in order to use the app.

In order to view documents in Kibana:

Expand Down Expand Up @@ -124,5 +124,5 @@ The extracted conditions can be validated against manually extracted conditions
Usage:

```python
extract_and_validate_pdf.py --pdf_csv_pairs <pdf_path> <expected_csv_path> --pdf_csv_pairs <pdf_path> <expected_csv_path> ...
python -m app.extract_and_validate_pdf --pdf_csv_pairs <pdf_path> <expected_csv_path> --pdf_csv_pairs <pdf_path> <expected_csv_path> ...
```
2 changes: 2 additions & 0 deletions services/permits/app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,7 @@
if DEBUG_MODE:
if not os.path.exists("debug"):
os.makedirs("debug")
if not os.path.exists("app/cache"):
os.makedirs("app/cache")

mds.add_middleware(OpenIdConnectMiddleware)
28 changes: 0 additions & 28 deletions services/permits/app/celery.py

This file was deleted.

18 changes: 18 additions & 0 deletions services/permits/app/celery/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import logging
import os

import celery

logger = logging.getLogger(__name__)
CACHE_REDIS_HOST = os.environ.get('CACHE_REDIS_HOST', 'redis')
CACHE_REDIS_PORT = os.environ.get('CACHE_REDIS_PORT', 6379)
CACHE_REDIS_PASS = os.environ.get('CACHE_REDIS_PASS', 'redis-password')
CACHE_REDIS_URL = 'redis://:{0}@{1}:{2}'.format(CACHE_REDIS_PASS, CACHE_REDIS_HOST,
CACHE_REDIS_PORT)

celery_app = celery.Celery(__name__, broker=CACHE_REDIS_URL, backend='app.celery.elasticsearch_backend:MDSElasticSearchBackend')

celery_app.conf.task_default_queue = 'permits'
celery_app.autodiscover_tasks([
'app.permit_conditions.tasks',
])
37 changes: 37 additions & 0 deletions services/permits/app/celery/elasticsearch_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os

import elasticsearch
from celery.backends.elasticsearch import ElasticsearchBackend

ca_cert = os.environ.get("ELASTICSEARCH_CA_CERT", None)
host = os.environ.get("ELASTICSEARCH_HOST", None) or "https://elasticsearch:9200"
username = os.environ.get("ELASTICSEARCH_USERNAME", "")
password = os.environ.get("ELASTICSEARCH_PASSWORD", "")

scheme, hostname = host.split('://')

backend_url = f'{scheme}://{username}:{password}@{hostname}/celery'

class MDSElasticSearchBackend(ElasticsearchBackend):
"""
Elasticsearch backend that adds support for the `ca_certs` parameter.
This is required for connecting to an elasticsearch instance with a self-signed certificate which is the case for us
"""
def __init__(self, url=None, *args, **kwargs):
self.url = url
super().__init__(url=backend_url, *args, **kwargs)

def _get_server(self):
http_auth = None
if self.username and self.password:
http_auth = (self.username, self.password)

return elasticsearch.Elasticsearch(
f'{self.scheme}://{self.host}:{self.port}',
retry_on_timeout=self.es_retry_on_timeout,
max_retries=self.es_max_retries,
timeout=self.es_timeout,
http_auth=http_auth,
verify_certs=True if ca_cert else False,
ca_certs=ca_cert if ca_cert else None,
)
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def results(task_id: str) -> PermitConditions:
return JSONResponse(status_code=202, content={"detail": f"Task has not completed yet. Current status: {res.status}"})

@router.get("/permit_conditions/results/csv", responses={202: {"model": InProgressJobStatusResponse}})
def results(task_id: str) -> str:
def csv_results(task_id: str) -> str:
"""
Get the results of a permit conditions extraction job in a csv format
Args:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def run(self, data: ChatData):
"""

if DEBUG_MODE:
with open(f"page_{self.start_page}.json", "w") as f:
with open(f"debug/validator_page_{self.start_page}.json", "w") as f:
f.write(
json.dumps(
PermitConditions(conditions=conditions).model_dump(mode="json"),
Expand Down
2 changes: 1 addition & 1 deletion services/permits/app/permit_search/pipelines/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
logger = logging.getLogger(__name__)

ca_cert = os.environ.get("ELASTICSEARCH_CA_CERT", None)
host = os.environ.get("ELASTICSEARCH_HOST", "http://elasticsearch:9200")
host = os.environ.get("ELASTICSEARCH_HOST", "https://elasticsearch:9200")
username = os.environ.get("ELASTICSEARCH_USERNAME", "")
password = os.environ.get("ELASTICSEARCH_PASSWORD", "")

Expand Down
118 changes: 105 additions & 13 deletions services/permits/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ services:
- ./:/opt/pipelines
- ./:/code
- fileuploads:/file-uploads
- certs:/permitsdata/certs
ports:
- 8004:80
restart: on-failure
Expand All @@ -24,6 +25,12 @@ services:
- ROOT_PATH=/haystack
- FILE_UPLOAD_PATH=/file-uploads
- HAYSTACK_TELEMETRY_ENABLED=False
- ELASTICSEARCH_CA_CERT=/permitsdata/certs/ca/ca.crt
depends_on:
elasticsearch:
condition: service_healthy
redis:
condition: service_healthy
haystack_celery:
container_name: haystack_celery
build:
Expand All @@ -33,6 +40,7 @@ services:
- ./:/opt/pipelines
- ./:/code
- fileuploads:/file-uploads
- certs:/permitsdata/certs
command: >-
python -m watchdog.watchmedo auto-restart
-d app/ -p '*.py' --recursive --
Expand All @@ -44,6 +52,8 @@ services:
- ROOT_PATH=/haystack
- FILE_UPLOAD_PATH=/file-uploads
- HAYSTACK_TELEMETRY_ENABLED=False
- ELASTICSEARCH_CA_CERT=/permitsdata/certs/ca/ca.crt

depends_on:
kibana:
condition: service_healthy
Expand All @@ -53,46 +63,128 @@ services:
condition: service_started
redis:
condition: service_started
create_certs:
tty: true
container_name: create_certs
image: docker.elastic.co/elasticsearch/elasticsearch:8.12.1
command: >
bash -c '
### 1. Create CA and certificates for elasticsearch.
### 2. Wait for elasticsearch to spin up
### 3. Set a password for the `kibana_system` so it can connect to elasticsearch
if [ ! -f /certs/ca.zip ]; then
echo "Creating CA";
bin/elasticsearch-certutil ca --silent --pem -out /certs/ca.zip;
unzip /certs/ca.zip -d /certs;
fi;
if [[ ! -f /certs/bundle.zip ]]; then
cat << EOF >> instances.yml
instances:
- name: elasticsearch
dns:
- elasticsearch
- localhost
ip:
- 127.0.0.1
EOF
bin/elasticsearch-certutil cert --silent --pem -out /certs/bundle.zip --in instances.yml --ca-cert /certs/ca/ca.crt --ca-key /certs/ca/ca.key;
unzip /certs/bundle.zip -d /certs;
fi;
echo "Setting file permissions"
chown -R root:root /certs;
find . -type d -exec chmod 750 \{\} \;;
find . -type f -exec chmod 640 \{\} \;;
echo "Waiting for Elasticsearch availability";
until curl -s --cacert /certs/ca/ca.crt https://elasticsearch:9200 | grep -q "missing authentication credentials"; do sleep 30; done;
echo "Setting kibana_system password";
until curl -s -X POST --cacert /certs/ca/ca.crt -u "${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" -H "Content-Type: application/json" https://elasticsearch:9200/_security/user/kibana_system/_password -d "{\"password\":\"${ELASTICSEARCH_PASSWORD}\"}" | grep -q "^{}"; do sleep 10; done;
echo "All done!";
'
user: "0"
working_dir: /usr/share/elasticsearch
volumes: ['certs:/certs']
healthcheck:
test: ["CMD-SHELL", "[ -f /certs/elasticsearch/elasticsearch.crt ]"]
timeout: 5s
retries: 120

elasticsearch:
image: "docker.elastic.co/elasticsearch/elasticsearch:8.12.1"
container_name: elasticsearch
ports:
- 9200:9200
restart: on-failure
environment:
- ELASTIC_PASSWORD=${ELASTICSEARCH_PASSWORD}
- discovery.type=single-node
- xpack.security.enabled=false
- xpack.security.enabled=true
- "ES_JAVA_OPTS=-Xms1024m -Xmx1024m"
- node.name=elasticsearch
- cluster.name=elasticsearch
- cluster.routing.allocation.disk.threshold_enabled=false
- xpack.security.enabled="true"
- xpack.security.transport.ssl.enabled="true"
- xpack.security.http.ssl.enabled="true"
- xpack.security.transport.ssl.verification_mode=certificate
- xpack.security.transport.ssl.key=/usr/share/elasticsearch/config/certs/elasticsearch/elasticsearch.key
- xpack.security.transport.ssl.certificate=/usr/share/elasticsearch/config/certs/elasticsearch/elasticsearch.crt
- xpack.security.transport.ssl.certificate_authorities=/usr/share/elasticsearch/config/certs/ca/ca.crt
- xpack.security.http.ssl.key=/usr/share/elasticsearch/config/certs/elasticsearch/elasticsearch.key
- xpack.security.http.ssl.certificate=/usr/share/elasticsearch/config/certs/elasticsearch/elasticsearch.crt
- xpack.security.http.ssl.certificate_authorities=/usr/share/elasticsearch/config/certs/ca/ca.crt

volumes:
- esdata01:/usr/share/elasticsearch/data

- certs:/usr/share/elasticsearch/config/certs
healthcheck:
test: curl --fail http://localhost:9200/_cat/health || exit 1
interval: 10s
timeout: 1s
retries: 10
test:
[
"CMD-SHELL",
"curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
]
interval: 10s
timeout: 10s
retries: 120

depends_on:
create_certs:
condition: service_healthy
kibana:
image: "docker.elastic.co/kibana/kibana:8.12.1"
ports:
- 5601:5601
depends_on:
- elasticsearch
elasticsearch:
condition: service_healthy
container_name: kibana

volumes:
- certs:/usr/share/kibana/config/certs
- kibanadata:/usr/share/kibana/data
environment:
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
- SERVERNAME=kibana
- ELASTICSEARCH_HOSTS=https://elasticsearch:9200
- ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt
- ELASTICSEARCH_USERNAME=kibana_system
- ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD}

healthcheck:
test: curl --fail http://localhost:5601/api/status || exit 1
interval: 10s
timeout: 1s
retries: 10
test:
[
"CMD-SHELL",
"curl -s -I http://localhost:5601 | grep -q 'HTTP/1.1 302 Found'",
]
interval: 10s
timeout: 10s
retries: 120
networks:
default:
driver: "bridge"

volumes:
certs:
fileuploads:
kibanadata:
esdata01:
driver: local

0 comments on commit b57c827

Please sign in to comment.