Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Taskrunner intermediate progress #3376

Merged
merged 6 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions packages/mira/core/taskrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ def __init__(self, description: str):
required=False,
help="The name of the output pipe",
)
parser.add_argument(
"--progress_pipe",
type=str,
required=False,
help="The name of the progress pipe",
)
parser.add_argument(
"--self_destruct_timeout_seconds",
type=int,
Expand All @@ -64,6 +70,8 @@ def __init__(self, description: str):
self.input = args.input
self.input_pipe = args.input_pipe
self.output_pipe = args.output_pipe
self.progress_pipe = args.progress_pipe
self.has_written_output = False

if self.input is None and self.input_pipe is None:
raise ValueError("Either `input` or `input_pipe` must be specified")
Expand Down Expand Up @@ -101,6 +109,27 @@ def read_input() -> dict:
except concurrent.futures.TimeoutError:
raise TimeoutError("Reading from input pipe timed out")

def write_progress_with_timeout(self, progress: dict, timeout_seconds: int):
def write_progress(progress_pipe: str, progress: dict):
bs = json.dumps(progress, separators=(',', ':')).encode()
with open(progress_pipe, 'wb') as f_out:
f_out.write(bs)
return

# if no progress pipe is specified, just print the progress to stdout
if self.progress_pipe is None:
self.log("Writing progress to stdout")
print(json.dumps(progress))
return

with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(write_progress, self.progress_pipe, progress)
try:
return future.result(timeout=timeout_seconds)
except concurrent.futures.TimeoutError:
print('Writing to progress pipe {} timed out'.format(self.progress_pipe), flush=True)
raise TimeoutError('Writing to output pipe timed out')

def write_output_with_timeout(self, output: dict, timeout_seconds: int = 30):
def write_output(output: dict):
self.log("Writing output to output pipe")
Expand All @@ -109,12 +138,21 @@ def write_output(output: dict):
f_out.write(bs)
return

# output should only be written once
if self.has_written_output:
raise ValueError("Output has already been written")

self.has_written_output = True

# if no output pipe is specified, just print the output to stdout
if self.output_pipe is None:
self.log("Writing output to stdout")
print(json.dumps(output))
return

# signal to the taskrunner that it should stop consuming progress
self.write_progress_with_timeout(self, {'done':True}, timeout_seconds)

# otherwise use the output pipe
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(write_output, output)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@

import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;

import org.springframework.amqp.rabbit.connection.CachingConnectionFactory;
import org.springframework.amqp.rabbit.connection.Connection;
import org.springframework.amqp.rabbit.connection.ConnectionListener;
import org.springframework.amqp.rabbit.core.RabbitAdmin;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.lang.NonNull;

import lombok.extern.slf4j.Slf4j;

Expand All @@ -26,14 +30,32 @@ public class RabbitConfiguration {
@Bean
public RabbitAdmin rabbitAdmin() throws URISyntaxException {

URI rabbitAddress = new URI(rabbitAddresses);
final URI rabbitAddress = new URI(rabbitAddresses);

log.info("Connecting to RabbitMQ: {}", rabbitAddress);

final CachingConnectionFactory connectionFactory = new CachingConnectionFactory();
connectionFactory.setUri(rabbitAddress);
connectionFactory.setUsername(username);
connectionFactory.setPassword(password);

connectionFactory.setConnectionListeners(Arrays.asList(new ConnectionListener() {
@Override
public void onCreate(@NonNull final Connection connection) {
log.info("Successfully created connection to RabbitMQ");
}

@Override
public void onClose(@NonNull final Connection connection) {
log.warn("Connection to RabbitMQ was closed");
}

@Override
public void onFailed(@NonNull final Exception exception) {
log.error("Connection to RabbitMQ failed to connect: ", exception);
}
}));

return new RabbitAdmin(connectionFactory);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

import lombok.Data;
Expand All @@ -37,6 +38,7 @@ public class Task {
private Process process;
private CompletableFuture<Integer> processFuture;
private String inputPipeName;
private String progressPipeName;
private String outputPipeName;
private TaskStatus status = TaskStatus.QUEUED;
private ScopedLock lock = new ScopedLock();
Expand All @@ -52,6 +54,7 @@ public Task(final TaskRequest req) throws IOException, InterruptedException {

this.req = req;
inputPipeName = "/tmp/input-" + req.getId();
progressPipeName = "/tmp/progress-" + req.getId();
outputPipeName = "/tmp/output-" + req.getId();

try {
Expand Down Expand Up @@ -88,16 +91,23 @@ private void setup() throws IOException, InterruptedException {
processBuilder = new ProcessBuilder("python", req.getScript(), "--id", req.getId().toString(),
"--input_pipe",
inputPipeName,
"--output_pipe", outputPipeName);
"--output_pipe",
outputPipeName,
"--progress_pipe",
progressPipeName);
} else {
// executable command, execute it directly
processBuilder = new ProcessBuilder(req.getScript(), "--id", req.getId().toString(),
"--input_pipe",
inputPipeName,
"--output_pipe", outputPipeName);
"--output_pipe",
outputPipeName,
"--progress_pipe",
progressPipeName);
}

log.debug("Creating input and output pipes: {} {} for task {}", inputPipeName, outputPipeName, req.getId());
log.info("Creating input, output, and progress pipes: {}, {}, {} for task {}", inputPipeName, outputPipeName,
progressPipeName, req.getId());

// Create the named pipes
final Process inputPipe = new ProcessBuilder("mkfifo", inputPipeName).start();
Expand All @@ -111,19 +121,25 @@ private void setup() throws IOException, InterruptedException {
if (exitCode != 0) {
throw new RuntimeException("Error creating input pipe");
}

final Process progressPipe = new ProcessBuilder("mkfifo", progressPipeName).start();
exitCode = progressPipe.waitFor();
if (exitCode != 0) {
throw new RuntimeException("Error creating input pipe");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be Error creating progress pipe here.

}
}

public void writeInputWithTimeout(final byte[] bytes, final int timeoutMinutes)
throws IOException, InterruptedException, TimeoutException {
log.debug("Dispatching write thread for input pipe: {} for task: {}", inputPipeName, req.getId());
log.info("Dispatching write thread for input pipe: {} for task: {}", inputPipeName, req.getId());

final CompletableFuture<Void> future = new CompletableFuture<>();
new Thread(() -> {
try {
// Write to the named pipe in a separate thread
log.debug("Opening input pipe: {} for task: {}", inputPipeName, req.getId());
log.info("Opening input pipe: {} for task: {}", inputPipeName, req.getId());
try (FileOutputStream fos = new FileOutputStream(inputPipeName)) {
log.debug("Writing to input pipe: {} for task: {}", inputPipeName, req.getId());
log.info("Writing to input pipe: {} for task: {}", inputPipeName, req.getId());
fos.write(bytes);
}
future.complete(null);
Expand Down Expand Up @@ -158,30 +174,72 @@ public void writeInputWithTimeout(final byte[] bytes, final int timeoutMinutes)

public byte[] readOutputWithTimeout(final int timeoutMinutes)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
log.debug("Dispatching read thread for input pipe: {} for task: {}", outputPipeName, req.getId());
log.info("Dispatching read thread for output pipe: {} for task: {}", outputPipeName, req.getId());

final CompletableFuture<byte[]> future = new CompletableFuture<>();
new Thread(() -> {
log.debug("Opening output pipe: {} for task: {}", outputPipeName, req.getId());
log.info("Opening output pipe: {} for task: {}", outputPipeName, req.getId());
try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(outputPipeName))) {
log.debug("Reading from output pipe: {} for task: {}", outputPipeName, req.getId());
log.info("Reading from output pipe: {} for task: {}", outputPipeName, req.getId());
final ByteArrayOutputStream bos = new ByteArrayOutputStream();
final byte[] buffer = new byte[BYTES_PER_READ]; // buffer size
int bytesRead;
while ((bytesRead = bis.read(buffer)) != -1) {
log.debug("Read {} bytes from output pipe: {} for task: {}", bytesRead, outputPipeName,
log.info("Read {} bytes from output pipe: {} for task: {}", bytesRead, outputPipeName,
req.getId());
bos.write(buffer, 0, bytesRead);
}
future.complete(bos.toByteArray());
} catch (final IOException e) {
future.completeExceptionally(e);
}
}).start();

Object result;
try {
result = CompletableFuture.anyOf(future, processFuture).get(timeoutMinutes, TimeUnit.MINUTES);
} catch (final TimeoutException e) {
future.cancel(true);
throw new TimeoutException("Reading from pipe took too long for task " + req.getId());
}

try (BufferedReader reader = new BufferedReader(
new InputStreamReader(new FileInputStream(outputPipeName)))) {
log.debug("Reading on output pipe: {} for task {}", outputPipeName, req.getId());
future.complete(reader.readLine().getBytes());
if (result == null) {
throw new RuntimeException("Unexpected null result for task " + req.getId());
}

if (result instanceof byte[]) {
// we got our response
return (byte[]) result;
}
if (result instanceof Integer) {
// process has exited early
if (getStatus() == TaskStatus.CANCELLED) {
throw new InterruptedException("Process for task " + req.getId() + " has been cancelled");
}
throw new InterruptedException("Process for task " + req.getId() + " exited early with code " + result);
}

throw new RuntimeException("Unexpected result type: " + result.getClass());
}

public byte[] readProgressWithTimeout(final int timeoutMinutes)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
log.info("Dispatching read thread for progress pipe: {} for task: {}", progressPipeName, req.getId());

final CompletableFuture<byte[]> future = new CompletableFuture<>();
new Thread(() -> {
log.info("Opening progress pipe: {} for task: {}", progressPipeName, req.getId());
try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(progressPipeName))) {
log.info("Reading from progress pipe: {} for task: {}", progressPipeName, req.getId());
final ByteArrayOutputStream bos = new ByteArrayOutputStream();
final byte[] buffer = new byte[BYTES_PER_READ]; // buffer size
int bytesRead;
while ((bytesRead = bis.read(buffer)) != -1) {
log.info("Read {} bytes from progress pipe: {} for task: {}", bytesRead, progressPipeName,
req.getId());
bos.write(buffer, 0, bytesRead);
}
future.complete(bos.toByteArray());
} catch (final IOException e) {
future.completeExceptionally(e);
}
Expand All @@ -201,6 +259,15 @@ public byte[] readOutputWithTimeout(final int timeoutMinutes)

if (result instanceof byte[]) {
// we got our response
try {
final JsonNode progress = mapper.readTree((byte[]) result);
if (progress.has("done")) {
// finished reading progress
return null;
}
} catch (final Exception e) {
// do nothing
}
Comment on lines +268 to +270
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why doing a catch if we do nothing? To not stop the thread?

return (byte[]) result;
}
if (result instanceof Integer) {
Expand All @@ -227,6 +294,12 @@ public void cleanup() {
log.warn("Exception occurred while cleaning up the task output pipe:" + e);
}

try {
Files.deleteIfExists(Paths.get(progressPipeName));
} catch (final Exception e) {
log.warn("Exception occurred while cleaning up the task progress pipe:" + e);
}

try {
cancel();
} catch (final Exception e) {
Expand Down Expand Up @@ -263,7 +336,7 @@ public void start() throws IOException, InterruptedException {
processFuture = new CompletableFuture<>();
new Thread(() -> {
try {
log.debug("Begin waiting for process to exit for task {}");
log.info("Begin waiting for process to exit for task {}", req.getId());
final int exitCode = process.waitFor();
log.info("Process exited with code {} for task {}", exitCode, req.getId());
lock.lock(() -> {
Expand All @@ -277,7 +350,7 @@ public void start() throws IOException, InterruptedException {
status = TaskStatus.SUCCESS;
}
});
log.debug("Finalized process status for task {}", exitCode, req.getId());
log.info("Finalized process status for task {}", exitCode, req.getId());
processFuture.complete(exitCode);
} catch (final InterruptedException e) {
log.warn("Process failed to exit cleanly for task {}: {}", req.getId(), e);
Expand Down Expand Up @@ -349,7 +422,7 @@ public boolean flagAsCancelling() {
return lock.lock(() -> {
if (status == TaskStatus.QUEUED) {
// if we havaen't started yet, flag it as cancelled
log.debug("Cancelled task {} before starting it", req.getId());
log.info("Cancelled task {} before starting it", req.getId());
status = TaskStatus.CANCELLED;
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,20 @@ private void dispatchSingleInputSingleOutputTask(final TaskRequest req)
// write the input to the task
task.writeInputWithTimeout(req.getInput(), req.getTimeoutMinutes());

while (true) {
// block and wait for progress from the task
final byte[] output = task.readProgressWithTimeout(req.getTimeoutMinutes());
if (output == null) {
// no more progress
break;
}

final TaskResponse progressResp = task.createResponse(TaskStatus.RUNNING);
progressResp.setOutput(output);
final String progressJson = mapper.writeValueAsString(progressResp);
rabbitTemplate.convertAndSend(TASK_RUNNER_RESPONSE_EXCHANGE, "", progressJson);
}

// block and wait for output from the task
final byte[] output = task.readOutputWithTimeout(req.getTimeoutMinutes());

Expand Down
Loading
Loading