Skip to content

feat: Allow setting metadata for ByteStream when created from file or from string #11458

feat: Allow setting metadata for ByteStream when created from file or from string

feat: Allow setting metadata for ByteStream when created from file or from string #11458

Workflow file for this run

# If you change this name also do it in tests_skipper.yml and ci_metrics.yml
name: Tests
on:
workflow_dispatch: # Activate this workflow manually
push:
branches:
- main
# release branches have the form v1.9.x
- "v[0-9].*[0-9].x"
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
paths:
- "haystack/**/*.py"
- "test/**/*.py"
- "test/test_requirements.txt"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
CORE_AZURE_CS_ENDPOINT: ${{ secrets.CORE_AZURE_CS_ENDPOINT }}
CORE_AZURE_CS_API_KEY: ${{ secrets.CORE_AZURE_CS_API_KEY }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
PYTHON_VERSION: "3.8"
jobs:
black:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Black
run: |
pip install --upgrade pip
pip install .[dev]
- name: Check status
run: |
if ! black . --check; then
git status
exit 1
fi
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
install-dependencies:
name: Install and cache ${{ matrix.os }} dependencies
needs: black
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Haystack and the dependencies needed for tests
run: pip install -r test/test_requirements.txt
- uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }}
key: pip-${{ runner.os }}-${{ github.run_id }}-${{ github.run_attempt }}
unit-tests:
name: Unit / ${{ matrix.os }}
needs: install-dependencies
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- windows-latest
- macos-latest
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Restore Python dependencies
uses: actions/cache/restore@v4
with:
path: ${{ env.pythonLocation }}
key: pip-${{ runner.os }}-${{ github.run_id }}-${{ github.run_attempt }}
- name: Run
run: pytest --cov-report xml:coverage.xml --cov="haystack" -m "not integration" test
- name: Coveralls
# We upload only coverage for ubuntu as handling both os
# complicates the workflow too much for little to no gain
if: matrix.os == 'ubuntu-latest'
uses: coverallsapp/github-action@v2
with:
path-to-lcov: coverage.xml
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
integration-tests-linux:
name: Integration / ubuntu-latest
needs: unit-tests
runs-on: ubuntu-latest
services:
tika:
image: apache/tika:2.9.0.0
ports:
- 9998:9998
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
sudo apt update
sudo apt install ffmpeg # for local Whisper tests
- name: Restore Python dependencies
uses: actions/cache/restore@v4
with:
path: ${{ env.pythonLocation }}
key: pip-${{ runner.os }}-${{ github.run_id }}-${{ github.run_attempt }}
- name: Run
run: pytest --maxfail=5 -m "integration" test
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
integration-tests-macos:
name: Integration / macos-latest
needs: unit-tests
runs-on: macos-latest
env:
HAYSTACK_MPS_ENABLED: false
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
brew install ffmpeg # for local Whisper tests
- name: Restore Python dependencies
uses: actions/cache/restore@v4
with:
path: ${{ env.pythonLocation }}
key: pip-${{ runner.os }}-${{ github.run_id }}-${{ github.run_attempt }}
- name: Run
run: pytest --maxfail=5 -m "integration" test -k 'not tika'
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
integration-tests-windows:
name: Integration / windows-latest
needs: unit-tests
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Restore Python dependencies
uses: actions/cache/restore@v4
with:
path: ${{ env.pythonLocation }}
key: pip-${{ runner.os }}-${{ github.run_id }}-${{ github.run_attempt }}
- name: Run
run: pytest --maxfail=5 -m "integration" test -k 'not tika'
- name: Calculate alert data
id: calculator
shell: bash
if: (success() || failure()) && github.ref_name == 'main'
run: |
if [ "${{ job.status }}" = "success" ]; then
echo "alert_type=success" >> "$GITHUB_OUTPUT";
else
echo "alert_type=error" >> "$GITHUB_OUTPUT";
fi
- name: Send event to Datadog
if: (success() || failure()) && github.ref_name == 'main'
uses: masci/datadog@v1
with:
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
api-url: https://api.datadoghq.eu
events: |
- title: "${{ github.workflow }} workflow"
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
alert_type: "${{ steps.calculator.outputs.alert_type }}"
source_type_name: "Github"
host: ${{ github.repository_owner }}
tags:
- "project:${{ github.repository }}"
- "job:${{ github.job }}"
- "run_id:${{ github.run_id }}"
- "workflow:${{ github.workflow }}"
- "branch:${{ github.ref_name }}"
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
catch-all:
name: Catch-all check
runs-on: ubuntu-latest
# This job will be executed only after all the other tests
# are successful.
# This way we'll be able to mark only this test as required
# and skip it accordingly.
needs:
- integration-tests-linux
- integration-tests-macos
- integration-tests-windows
steps:
- name: Finisher
run: echo "Finish him!"