From c0ff6b5ca388d8366100f6ef4b5740c9b4219d59 Mon Sep 17 00:00:00 2001 From: le0m Date: Wed, 11 Sep 2024 22:48:57 +0200 Subject: [PATCH] refactor: move signal handling to applications, removing init function from module, and use contexts better --- lambda/main.go | 19 ++++++++++++-- plain/main.go | 59 ++++++++++++++++++++++++++++++++++-------- print2pdf/print2pdf.go | 58 +++++++++++++++++++++++------------------ 3 files changed, 98 insertions(+), 38 deletions(-) diff --git a/lambda/main.go b/lambda/main.go index 6e12075..216f71f 100644 --- a/lambda/main.go +++ b/lambda/main.go @@ -5,8 +5,10 @@ import ( "encoding/json" "fmt" "os" + "os/signal" "slices" "strings" + "syscall" "github.com/aws/aws-lambda-go/events" "github.com/aws/aws-lambda-go/lambda" @@ -29,13 +31,26 @@ var BucketName = os.Getenv("BUCKET") // Comma-separated list of allowed hosts for CORS requests. Defaults to "*", meaning all hosts. var CorsAllowedHosts = os.Getenv("CORS_ALLOWED_HOSTS") -func main() { +// Init function checks for required environment variables. +func init() { if BucketName == "" { fmt.Fprintln(os.Stderr, "missing required environment variable BUCKET") os.Exit(1) } +} + +func main() { + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + if err := print2pdf.StartBrowser(ctx); err != nil { + fmt.Fprintf(os.Stderr, "error starting browser: %s\n", err) + os.Exit(1) + } lambda.Start(handler) + + <-ctx.Done() + stop() } // Handle a request. @@ -78,7 +93,7 @@ func handler(ctx context.Context, event events.APIGatewayProxyRequest) (events.A return JsonError(ve.Error(), 400), nil } else if err != nil { - fmt.Fprintf(os.Stderr, "error getting PDF buffer: %s\n", err) + fmt.Fprintf(os.Stderr, "error getting PDF: %s\n", err) return JsonError("internal server error", 500), nil } diff --git a/plain/main.go b/plain/main.go index 1ddd687..8442c3e 100644 --- a/plain/main.go +++ b/plain/main.go @@ -1,14 +1,19 @@ package main import ( + "context" "encoding/json" "errors" "fmt" "io" + "net" "net/http" "os" + "os/signal" "slices" "strings" + "syscall" + "time" "github.com/chialab/print2pdf-go/print2pdf" ) @@ -30,20 +35,48 @@ var Port = os.Getenv("PORT") // Comma-separated list of allowed hosts for CORS requests. Defaults to "*", meaning all hosts. var CorsAllowedHosts = os.Getenv("CORS_ALLOWED_HOSTS") -func main() { +// Init function set default values to environment variables. +func init() { if Port == "" { Port = "3000" } +} + +func main() { + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + if err := print2pdf.StartBrowser(ctx); err != nil { + fmt.Fprintf(os.Stderr, "error starting browser: %s\n", err) + os.Exit(1) + } http.HandleFunc("/status", statusHandler) http.HandleFunc("/v1/print", printV1Handler) http.HandleFunc("/v2/print", printV2Handler) - fmt.Printf("server listening on port %s\n", Port) - err := http.ListenAndServe(":"+Port, nil) - if errors.Is(err, http.ErrServerClosed) { - fmt.Println("server closed") - } else if err != nil { - fmt.Fprintf(os.Stderr, "server error: %s\n", err) + + srv := &http.Server{ + Addr: ":" + Port, + BaseContext: func(_ net.Listener) context.Context { return ctx }, + ReadTimeout: 10 * time.Second, + Handler: http.DefaultServeMux, + } + srvErr := make(chan error, 1) + go func() { + fmt.Printf("server listening on port %s\n", Port) + srvErr <- srv.ListenAndServe() + }() + + select { + case err := <-srvErr: + fmt.Fprintf(os.Stderr, "error starting server: %s\n", err) + os.Exit(1) + case <-ctx.Done(): + stop() + } + + err := srv.Shutdown(context.Background()) + if err != nil { + fmt.Fprintf(os.Stderr, "error closing server: %s\n", err) os.Exit(1) } } @@ -133,9 +166,13 @@ func handlePrintV1Post(w http.ResponseWriter, r *http.Request) { fmt.Fprintf(os.Stderr, "request validation error: %s\n", ve) JsonError(w, ve.Error(), http.StatusBadRequest) + return + } else if errors.Is(r.Context().Err(), context.Canceled) { + fmt.Println("connection closed or request canceled") + return } else if err != nil { - fmt.Fprintf(os.Stderr, "error getting PDF buffer: %s\n", err) + fmt.Fprintf(os.Stderr, "error getting PDF: %s\n", err) JsonError(w, "internal server error", http.StatusInternalServerError) return @@ -173,10 +210,10 @@ func handlePrintV2Post(w http.ResponseWriter, r *http.Request) { if ve, ok := err.(print2pdf.ValidationError); ok { fmt.Fprintf(os.Stderr, "request validation error: %s\n", ve) JsonError(w, ve.Error(), http.StatusBadRequest) - - return + } else if errors.Is(r.Context().Err(), context.Canceled) { + fmt.Println("connection closed or request canceled") } else if err != nil { - fmt.Fprintf(os.Stderr, "error getting PDF buffer: %s\n", err) + fmt.Fprintf(os.Stderr, "error getting PDF: %s\n", err) JsonError(w, "internal server error", http.StatusInternalServerError) return diff --git a/print2pdf/print2pdf.go b/print2pdf/print2pdf.go index bc773b6..b062e81 100644 --- a/print2pdf/print2pdf.go +++ b/print2pdf/print2pdf.go @@ -3,7 +3,9 @@ Package print2pdf provides functions to save a webpage as a PDF file, leveraging Requires the environment variable CHROMIUM_PATH to be set with the full path to the Chromium binary. -This packages uses init function to start an headless instance of Chromium, to reduce startup time when used as a web service. +The StartBrowser() function starts a headless instance of Chromium, to reduce startup time in long running services (like a web server), +and therefore must be called before any call PrintPDF(). These functions can (and probably should) use different contexts: the one passed +to StartBrowser() closes the whole browser when done or cancelled, while the one passed to PrintPDF() closes only the tab it uses. */ package print2pdf @@ -13,10 +15,8 @@ import ( "fmt" "io" "os" - "os/signal" "slices" "strings" - "syscall" "github.com/chromedp/cdproto/emulation" chromedpio "github.com/chromedp/cdproto/io" @@ -150,33 +150,40 @@ var ChromiumPath = os.Getenv("CHROMIUM_PATH") // Reference to browser context, initialized in init function of this package. var browserCtx context.Context -// Allocate a browser to be reused by multiple handler invocations, to reduce startup time. +// Init function checks for required environment variables. func init() { if ChromiumPath == "" { fmt.Fprintln(os.Stderr, "missing required environment variable CHROMIUM_PATH") os.Exit(1) } +} + +// Allocate a browser to be reused by multiple invocations, to reduce startup time. Cancelling the context will close the browser. +// This function must be called before starting to print PDFs. +func StartBrowser(ctx context.Context) error { + if Running() { + return nil + } + defer Elapsed("Browser startup")() opts := append(chromedp.DefaultExecAllocatorOptions[:], chromedp.ExecPath(ChromiumPath)) - allocatorCtx, allocatorCancel := chromedp.NewExecAllocator(context.Background(), opts...) + allocatorCtx, _ := chromedp.NewExecAllocator(ctx, opts...) browserCtx, _ = chromedp.NewContext(allocatorCtx) // Navigate to blank page so that the browser is started. err := chromedp.Run(browserCtx, chromedp.Tasks{chromedp.Navigate("about:blank")}) if err != nil { fmt.Fprintf(os.Stderr, "error initializing browser: %v", err) - os.Exit(1) + + return err } - // Listen for interrupt/sigterm and gracefully close the browser. - ch := make(chan os.Signal, 2) - signal.Notify(ch, os.Interrupt, syscall.SIGTERM) - go func() { - <-ch - fmt.Println("interrupt received, closing browser before exiting...") - allocatorCancel() - os.Exit(0) - }() + return nil + } + +// Check if the browser is still running. +func Running() bool { + return browserCtx != nil && browserCtx.Err() == nil } // Get print format dimensions from string name. @@ -249,13 +256,13 @@ func getPrintParams(data GetPDFParams) (page.PrintToPDFParams, error) { return params, nil } -// Check if the browser is still running. -func Running() bool { - return browserCtx != nil && browserCtx.Err() == nil -} - -// Print a webpage in PDF format and write the result to the input handler. +// Print a webpage in PDF format and write the result to the input handler. Cancelling the context will close the tab. +// StartBrowser() must have been called once before calling this function. func PrintPDF(ctx context.Context, data GetPDFParams, h PDFHandler) (string, error) { + if browserCtx == nil { + return "", fmt.Errorf("must call StartBrowser() before printing a PDF") + } + defer Elapsed("Total time to print PDF")() params, err := getPrintParams(data) @@ -272,14 +279,15 @@ func PrintPDF(ctx context.Context, data GetPDFParams, h PDFHandler) (string, err media = data.Media } - // NOTE: here we're using browserCtx instead of the one for this handler's invocation. - tCtx, cancel := chromedp.NewContext(browserCtx) - defer cancel() + tabCtx, tabCancel := chromedp.NewContext(browserCtx) + defer tabCancel() + // Cancel the tab context (closing the tab) if the passed context is canceled. + context.AfterFunc(ctx, tabCancel) interactiveReached := false idleReached := false res := "" - err = chromedp.Run(tCtx, chromedp.Tasks{ + err = chromedp.Run(tabCtx, chromedp.Tasks{ chromedp.ActionFunc(func(ctx context.Context) error { defer Elapsed(fmt.Sprintf("Navigate to %s", data.Url))()