Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove SSH to Controller for job status #43

Merged
merged 7 commits into from
Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
CONTROLLER_HOST=ocrd-controller
CONTROLLER_PORT_SSH=22

MANAGER_DATA=~/.ssh/id_rsa
MANAGER_KEY=~/
MANAGER_DATA=~/
MANAGER_HOST=ocrd-manager
MANAGER_PORT_WEB=4004

Expand Down
14 changes: 0 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,44 +28,30 @@ Variables:
currently: "$(TAGNAME)"
- MONITOR_PORT_WEB TCP port for the (host-side) web server
currently: $(MONITOR_PORT_WEB)
- MANAGER_KEY SSH key file to mount (for the Controller client)
currently: "$(MANAGER_KEY)"
- MANAGER_DATA host directory to mount into `/data` (shared with Manager)
currently: "$(MANAGER_DATA)"
- MANAGER_WORKFLOWS host directory to mount into `/workflows` (shared with Manager)
currently: "$(MANAGER_WORKFLOWS)"
- NETWORK Docker network to use (manage via "docker network")
currently: $(NETWORK)
- CONTROLLER_HOST network address for the Controller client
(must be reachable from the container network)
currently: $(CONTROLLER_HOST)
- CONTROLLER_PORT_SSH network port for the Controller client
(must be reachable from the container network)
currently: $(CONTROLLER_PORT_SSH)
EOF
endef
export HELP
help: ; @eval "$$HELP"

MANAGER_KEY ?= $(firstword $(filter-out %.pub,$(wildcard $(HOME)/.ssh/id_*)))
MANAGER_DATA ?= $(CURDIR)
MANAGER_WORKFLOWS ?= $(CURDIR)
MONITOR_PORT_WEB ?= 5000
NETWORK ?= bridge
CONTROLLER_HOST ?= $(shell dig +short $$HOSTNAME)
CONTROLLER_PORT_SSH ?= 8022
run: $(DATA)
docker run -d --rm \
-h ocrd_monitor \
--name ocrd_monitor \
--network=$(NETWORK) \
-p $(MONITOR_PORT_WEB):5000 \
-v ${MANAGER_KEY}:/id_rsa \
--mount type=bind,source=$(MANAGER_KEY),target=/id_rsa \
-v $(MANAGER_DATA):/data \
-v $(MANAGER_WORKFLOWS):/workflows \
-v shared:/run/lock/ocrd.jobs \
-e CONTROLLER=$(CONTROLLER_HOST):$(CONTROLLER_PORT_SSH) \
-e MONITOR_PORT_LOG=${MONITOR_PORT_LOG} \
$(TAGNAME)

Expand Down
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,8 @@ In order to work properly, the following **environment variables** must be set:

| Variable | Description |
| ------------------- | -------------------------------------------------------------------------------- |
| CONTROLLER_HOST | Hostname of the OCR-D Controller |
| CONTROLLER_PORT_SSH | Port on the OCR-D Controller host that allows a SSH connection |
| MANAGER_DATA | Path to the OCR-D workspaces on the host |
| MANAGER_WORKFLOWS | Path to the OCR-D workflows on the host |
| MANAGER_KEY | Path to a private key that can be used to authenticate with the OCR-D Controller |
| MONITOR_PORT_WEB | The port at which the OCR-D Monitor will be available on the host |
| MONITOR_PORT_LOG | The port at which the Dozzle logs will be available on the host |

Expand Down
2 changes: 0 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ services:
hostname: ${MONITOR_HOST}

environment:
CONTROLLER: "${CONTROLLER_HOST}:${CONTROLLER_PORT_SSH}"
MANAGER_URL: "http://${MANAGER_HOST}:${MANAGER_PORT_WEB}"
MONITOR_PORT_LOG: ${MONITOR_PORT_LOG}
MONITOR_DB_CONNECTION: "mongodb://${MONITOR_DB_ROOT_USER:-root}:${MONITOR_DB_ROOT_PASSWORD:-root_password}@ocrd-database:27017"
Expand All @@ -28,7 +27,6 @@ services:
volumes:
- ${MANAGER_DATA}:/data
- ${MANAGER_WORKFLOWS}:/workflows
- ${MANAGER_KEY}:/id_rsa
- shared:/run/lock/ocrd.jobs

ocrd-logview:
Expand Down
31 changes: 9 additions & 22 deletions init.sh
Original file line number Diff line number Diff line change
@@ -1,31 +1,18 @@
#!/usr/bin/env bash

mkdir -p ~/.ssh
cat /id_rsa >> ~/.ssh/id_rsa
chmod go-rw ~/.ssh/id_rsa

# Add ocrd controller as global and known_hosts if env exist
if [ -n "$CONTROLLER" ]; then
CONTROLLER_HOST=${CONTROLLER%:*}
CONTROLLER_PORT=${CONTROLLER#*:}
CONTROLLER_IP=$(nslookup $CONTROLLER_HOST | grep 'Address\:' | awk 'NR==2 {print $2}')

if test -e /etc/ssh/ssh_known_hosts; then
ssh-keygen -R $CONTROLLER_HOST -f /etc/ssh/ssh_known_hosts
ssh-keygen -R $CONTROLLER_IP -f /etc/ssh/ssh_known_hosts
fi
ssh-keyscan -H -p ${CONTROLLER_PORT:-22} $CONTROLLER_HOST,$CONTROLLER_IP >>/etc/ssh/ssh_known_hosts
fi

export MONITOR_DB_CONNECTION_STRING=$MONITOR_DB_CONNECTION
export OCRD_BROWSER__MODE=native
export OCRD_BROWSER__WORKSPACE_DIR=/data/ocr-d
# all OCR-D workspaces on the Manager are under /data/ocr-d
# but since the Manager resolves everything under /data
# it tracks the workspace directory relative to that in the database
# (e.g. ocr-d/testdata-production)
# so if we write /data/ocr-d, we could list workspaces fine,
# but our workspace URLs from the job database would be wrong
# (resolving as /data/ocr-d/ocr-d/...)
# so better just use /data as well here:
export OCRD_BROWSER__WORKSPACE_DIR=/data
export OCRD_BROWSER__PORT_RANGE="[9000,9100]"
export OCRD_LOGVIEW__PORT=$MONITOR_PORT_LOG
export OCRD_CONTROLLER__HOST=$CONTROLLER_HOST
export OCRD_CONTROLLER__PORT=$CONTROLLER_PORT
export OCRD_CONTROLLER__USER=admin
export OCRD_CONTROLLER__KEYFILE=~/.ssh/id_rsa
export OCRD_MANAGER__URL=$MANAGER_URL

cd /usr/local/ocrd-monitor
Expand Down
2 changes: 1 addition & 1 deletion ocrdmonitor/database/_browserprocessrepository.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, restoring_factory: BrowserRestoringFactory) -> None:
self._restoring_factory = restoring_factory

async def insert(self, browser: OcrdBrowser) -> None:
await BrowserProcess( # type: ignore
await BrowserProcess(
address=browser.address(),
owner=browser.owner(),
process_id=browser.process_id(),
Expand Down
8 changes: 4 additions & 4 deletions ocrdmonitor/database/_initdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ async def init(connection_str: str, force_initialize: bool = False) -> None:

__initialized = True
connection_str = rebuild_connection_string(connection_str)
client: AsyncIOMotorClient = AsyncIOMotorClient(connection_str) # type: ignore
client.get_io_loop = asyncio.get_event_loop # type: ignore
client = AsyncIOMotorClient(connection_str) # type: ignore[var-annotated]
client.get_io_loop = asyncio.get_event_loop # type: ignore[method-assign]
await init_beanie(
database=client.ocrd, # type: ignore
document_models=[BrowserProcess, MongoOcrdJob], # type: ignore
database=client.ocrd,
document_models=[BrowserProcess, MongoOcrdJob],
)

return init
Expand Down
3 changes: 1 addition & 2 deletions ocrdmonitor/database/_ocrdjobrepository.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ class MongoOcrdJob(Document):
workdir: Path
remotedir: str
workflow_file: Path
controller_address: str

class Settings:
name = "OcrdJob"
Expand All @@ -36,7 +35,7 @@ class Settings:

class MongoJobRepository:
async def insert(self, job: OcrdJob) -> None:
await MongoOcrdJob(**asdict(job)).insert() # type: ignore
await MongoOcrdJob(**asdict(job)).insert()

async def find_all(self) -> list[OcrdJob]:
return [OcrdJob(**j.dict(exclude={"id"})) for j in await MongoOcrdJob.find_all().to_list()]
6 changes: 1 addition & 5 deletions ocrdmonitor/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
SubProcessOcrdBrowserFactory,
)
from ocrdmonitor import database
from ocrdmonitor.protocols import RemoteServer, Repositories
from ocrdmonitor.protocols import Repositories
from ocrdmonitor.server.settings import Settings
from ocrdmonitor.sshremote import SSHRemote

BrowserType = Type[SubProcessOcrdBrowser] | Type[DockerOcrdBrowser]
CreatingFactories: dict[str, Callable[[set[int]], OcrdBrowserFactory]] = {
Expand Down Expand Up @@ -40,6 +39,3 @@ async def repositories(self) -> Repositories:
def browser_factory(self) -> OcrdBrowserFactory:
port_range_set = set(range(*self.settings.ocrd_browser.port_range))
return CreatingFactories[self.settings.ocrd_browser.mode](port_range_set)

def controller_server(self) -> RemoteServer:
return SSHRemote(self.settings.ocrd_controller)
25 changes: 0 additions & 25 deletions ocrdmonitor/ocrdcontroller.py

This file was deleted.

58 changes: 0 additions & 58 deletions ocrdmonitor/processstatus.py

This file was deleted.

12 changes: 0 additions & 12 deletions ocrdmonitor/protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from typing import Collection, NamedTuple, Protocol

from ocrdbrowser import OcrdBrowser, OcrdBrowserFactory
from ocrdmonitor.processstatus import ProcessStatus
from ocrdmonitor.server.settings import Settings


Expand Down Expand Up @@ -49,7 +48,6 @@ class OcrdJob:
workdir: Path
remotedir: str
workflow_file: Path
controller_address: str

@property
def is_running(self) -> bool:
Expand All @@ -72,13 +70,6 @@ async def find_all(self) -> list[OcrdJob]:
...


class RemoteServer(Protocol):
async def read_file(self, path: str) -> str:
...

async def process_status(self, process_group: int) -> list[ProcessStatus]:
...

class Repositories(NamedTuple):
browser_processes: BrowserProcessRepository
ocrd_jobs: JobRepository
Expand All @@ -92,6 +83,3 @@ async def repositories(self) -> Repositories:

def browser_factory(self) -> OcrdBrowserFactory:
...

def controller_server(self) -> RemoteServer:
...
29 changes: 2 additions & 27 deletions ocrdmonitor/server/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,12 @@
from fastapi.responses import JSONResponse
from fastapi.templating import Jinja2Templates

from ocrdmonitor.ocrdcontroller import OcrdController
from ocrdmonitor.processstatus import ProcessStatus
from ocrdmonitor.protocols import Environment, OcrdJob, Repositories

import httpx
import logging


@dataclass
class RunningJob:
ocrd_job: OcrdJob
process_status: ProcessStatus


def split_into_running_and_completed(
jobs: Iterable[OcrdJob],
) -> tuple[list[OcrdJob], list[OcrdJob]]:
Expand All @@ -30,25 +22,11 @@ def split_into_running_and_completed(
return running_ocrd_jobs, completed_ocrd_jobs


def wrap_in_running_job_type(
running_ocrd_jobs: Iterable[OcrdJob],
job_status: Iterable[ProcessStatus | None],
) -> Iterable[RunningJob]:
running_jobs = [
RunningJob(job, process_status)
for job, process_status in zip(running_ocrd_jobs, job_status)
if process_status is not None
]

return running_jobs


def create_jobs(
templates: Jinja2Templates,
environment: Environment,
) -> APIRouter:
router = APIRouter(prefix="/jobs")
controller = OcrdController(environment.controller_server())

@router.get("/", name="jobs")
async def jobs(
Expand All @@ -58,17 +36,14 @@ async def jobs(
jobs = await job_repository.find_all()
running, completed = split_into_running_and_completed(jobs)

job_status = [await controller.status_for(job) for job in running]
running_jobs = wrap_in_running_job_type(running, job_status)

now = datetime.now(timezone.utc)
return templates.TemplateResponse(
"jobs.html.j2",
{
"request": request,
"running_jobs": sorted(
running_jobs,
key=lambda x: x.ocrd_job.time_created or now,
running,
key=lambda x: x.time_created or now,
),
"completed_jobs": sorted(
completed,
Expand Down
Loading
Loading