Skip to content

Commit

Permalink
Address feedback + fix merge conflicts
Browse files Browse the repository at this point in the history
Signed-off-by: Finn Roblin <finnrobl@amazon.com>
  • Loading branch information
finnroblin committed Aug 19, 2024
2 parents acb9e94 + fa1adf2 commit 31dfe45
Show file tree
Hide file tree
Showing 31 changed files with 1,941 additions and 804 deletions.
41 changes: 41 additions & 0 deletions .github/workflows/docker-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Docker Build and Test
on:
pull_request:
workflow_dispatch:
inputs:
logLevel:
description: Log level
required: true
default: warning
type: choice
options:
- info
- warning
- debug

jobs:
docker:
runs-on: ubuntu-latest
strategy:
matrix:
platform: ['linux/amd64', 'linux/arm64']
steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
version: 'v0.9.1'
- uses: actions/checkout@v4
with:
path: 'opensearch-benchmark-git'
- name: Docker Build ${{ matrix.platform }}
run: |
docker buildx version
cp -a opensearch-benchmark-git/* ./
echo "Disable VERSION arg to enter docker build test mode"
PLATFORM=${{ matrix.platform }}
PLATFORM=`echo $PLATFORM | tr '/' '-'`
docker buildx build --platform ${{ matrix.platform }} --build-arg BUILD_ENV=testing --build-arg BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` -f "docker/Dockerfile" -t "osb/osb-$PLATFORM" -o type=docker .
docker images | grep "osb/osb-$PLATFORM"
42 changes: 0 additions & 42 deletions .github/workflows/docker.yml

This file was deleted.

13 changes: 13 additions & 0 deletions .github/workflows/manual-integ.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@ jobs:
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- uses: KengoTODA/actions-setup-docker-compose@v1
with:
version: '1.29.2'
# - name: Enforce docker-compose v1
# run: |
# echo "GitHub starts to switch runners to include docker-compose v2"
# echo "which uses 'docker compose' command to replace 'docker-compose'"
# echo "this would cause issues in our test validation so we enforce v1 here"
# echo "https://github.com/actions/runner-images/commit/2a4bc14da46f1f8e358aa902a69edb9bef135472"
# sudo apt-get remove -y docker-compose-plugin
# sudo pip install docker-compose==1.29.2
# docker --version
# docker-compose --version
- name: Check out repository code
uses: actions/checkout@v2
- name: Clone pyenv
Expand Down
2 changes: 2 additions & 0 deletions DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ Integration tests are expected to run for approximately **20-30 mins** and can b
* Amazon Linux 2
* MacOS
Integration tests run against the standard [OpenSearch Benchmark workloads](https://github.com/opensearch-project/opensearch-benchmark-workloads). Sometimes, it may be necessary to run integration tests against a modified forked copy of these workloads. In that case, please follow the instructions [here](https://github.com/opensearch-project/opensearch-benchmark-workloads/blob/main/README.md#testing-the-workload).
Invoke integration tests by running the following command within the root directory of the repository:
```
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
[![CI](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/main.yml/badge.svg)](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/main.yml)
[![Integration](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/manual-integ.yml/badge.svg)](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/manual-integ.yml)
[![Release](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/release-drafter.yml/badge.svg)](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/release-drafter.yml)
[![Chat](https://img.shields.io/badge/chat-on%20forums-blue)](https://forum.opensearch.org/categories)
![PRs welcome!](https://img.shields.io/badge/PRs-welcome!-success)

<img src="https://github.com/opensearch-project/opensearch-benchmark/blob/main/opensearch_benchmark.png?raw=true" height="64px" alt="OpenSearch Benchmark">

OpenSearch Benchmark is the macrobenchmarking framework for OpenSearch.
Expand Down
24 changes: 16 additions & 8 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
###############################################################################
# Install OpenSearch Benchmark from PyPI to build a Docker image
###############################################################################
########################################################
# Install OpenSearch Benchmark to build a Docker image #
########################################################

FROM python:3.11.2-slim
ARG VERSION
ARG BUILD_ENV=production

ENV BENCHMARK_RUNNING_IN_DOCKER True
FROM python:3.11.2-slim as build_env_testing
ONBUILD COPY opensearch-benchmark-git/ ./

FROM python:3.11.2-slim as build_env_production
ONBUILD RUN echo Production Environment

FROM build_env_${BUILD_ENV}
WORKDIR /opensearch-benchmark
ENV BENCHMARK_RUNNING_IN_DOCKER=True

RUN apt-get -y update && \
apt-get install -y curl git gcc pbzip2 pigz && \
Expand All @@ -15,9 +23,9 @@ RUN apt-get -y update && \
RUN groupadd --gid 1000 opensearch-benchmark && \
useradd -d /opensearch-benchmark -m -k /dev/null -g 1000 -N -u 1000 -l -s /bin/bash benchmark

RUN if [ -z "$VERSION" ] ; then python3 -m pip install opensearch-benchmark ; else python3 -m pip install opensearch-benchmark==$VERSION ; fi

WORKDIR /opensearch-benchmark
ENV PIP_ONLY_BINARY=h5py
RUN if [ "$BUILD_ENV" = "testing" ] ; then echo Testing; ls -l; python3 -m pip install -e . ; \
else echo Production; if [ -z "$VERSION" ] ; then python3 -m pip install opensearch-benchmark ; else python3 -m pip install opensearch-benchmark==$VERSION ; fi; fi

RUN mkdir -p /opensearch-benchmark/.benchmark && \
chown -R 1000:0 /opensearch-benchmark/.benchmark
Expand Down
75 changes: 0 additions & 75 deletions docker/Dockerfile-development

This file was deleted.

37 changes: 0 additions & 37 deletions docker/docker-compose-tests.yml

This file was deleted.

3 changes: 3 additions & 0 deletions osbenchmark/builder/utils/template_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from osbenchmark.exceptions import InvalidSyntax, SystemSetupError
from osbenchmark.utils import io
from osbenchmark.workload import loader


class TemplateRenderer:
Expand All @@ -11,6 +12,7 @@ def render_template_file(self, root_path, variables, file_name):

def _render_template_file(self, root_path, variables, file_name):
env = jinja2.Environment(loader=jinja2.FileSystemLoader(root_path), autoescape=select_autoescape(['html', 'xml']))
env.filters["version_between"] = loader.version_between
template = env.get_template(io.basename(file_name))
# force a new line at the end. Jinja seems to remove it.
return template.render(variables) + "\n"
Expand All @@ -20,6 +22,7 @@ def render_template_string(self, template_string, variables):

def _render_template_string(self, template_string, variables):
env = jinja2.Environment(loader=jinja2.BaseLoader, autoescape=select_autoescape(['html', 'xml']))
env.filters["version_between"] = loader.version_between
template = env.from_string(template_string)

return template.render(variables)
Expand Down
43 changes: 41 additions & 2 deletions osbenchmark/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1440,7 +1440,6 @@ def as_dict(self):
if self.plugin_params:
d["plugin-params"] = self.plugin_params
return d

def to_result_dicts(self):
"""
:return: a list of dicts, suitable for persisting the results of this test execution in a format that is Kibana-friendly.
Expand Down Expand Up @@ -1784,6 +1783,7 @@ def __call__(self):
op_type = task.operation.type
error_rate = self.error_rate(t, op_type)
duration = self.duration(t)

if task.operation.include_in_results_publishing or error_rate > 0:
self.logger.debug("Gathering request metrics for [%s].", t)
result.add_op_metrics(
Expand All @@ -1800,8 +1800,19 @@ def __call__(self):
self.workload.meta_data,
self.test_procedure.meta_data,
task.operation.meta_data,
task.meta_data)
task.meta_data,
),
)

result.add_correctness_metrics(
t,
task.operation.name,
self.single_latency(t, op_type, metric_name="recall@k"),
self.single_latency(t, op_type, metric_name="recall@1"),
error_rate,
duration,
)

self.logger.debug("Gathering indexing metrics.")
result.total_time = self.sum("indexing_total_time")
result.total_time_per_shard = self.shard_stats("indexing_total_time")
Expand Down Expand Up @@ -1996,6 +2007,7 @@ def single_latency(self, task, operation_type, metric_name="latency"):
class GlobalStats:
def __init__(self, d=None):
self.op_metrics = self.v(d, "op_metrics", default=[])
self.correctness_metrics = self.v(d, "correctness_metrics", default=[])
self.total_time = self.v(d, "total_time")
self.total_time_per_shard = self.v(d, "total_time_per_shard", default={})
self.indexing_throttle_time = self.v(d, "indexing_throttle_time")
Expand Down Expand Up @@ -2081,6 +2093,22 @@ def op_metrics(op_item, key, single_value=False):
"max": item["max"]
}
})
elif metric == "correctness_metrics":
for item in value:
if "recall@k" in item:
all_results.append({
"task": item["task"],
"operation": item["operation"],
"name": "recall@k",
"value": item["recall@k"]
})
if "recall@1" in item:
all_results.append({
"task": item["task"],
"operation": item["operation"],
"name": "recall@1",
"value": item["recall@1"]
})
elif metric.startswith("total_transform_") and value is not None:
for item in value:
all_results.append({
Expand Down Expand Up @@ -2124,6 +2152,17 @@ def add_op_metrics(self, task, operation, throughput, latency, service_time, cli
doc["meta"] = meta
self.op_metrics.append(doc)

def add_correctness_metrics(self, task, operation, recall_at_k_stats, recall_at_1_stats, error_rate, duration):
self.correctness_metrics.append({
"task": task,
"operation": operation,
"recall@k": recall_at_k_stats,
"recall@1":recall_at_1_stats,
"error_rate": error_rate,
"duration": duration
}
)

def tasks(self):
# ensure we can read test_execution.json files before Benchmark 0.8.0
return [v.get("task", v["operation"]) for v in self.op_metrics]
Expand Down
Loading

0 comments on commit 31dfe45

Please sign in to comment.