From 26c6a6e8dc8a3d5c56965b73e3a4e972ed963c9d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 1 Dec 2024 22:17:39 -0500 Subject: [PATCH 01/17] initial port of multipart-form-data --- src/multipart_form/dune | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 src/multipart_form/dune diff --git a/src/multipart_form/dune b/src/multipart_form/dune new file mode 100644 index 00000000..6ce7f437 --- /dev/null +++ b/src/multipart_form/dune @@ -0,0 +1,5 @@ +(library + (name tiny_httpd_multipart_form_data) + (public_name tiny_httpd.multipart-form-data) + (synopsis "Port of multipart-form-data for tiny_httpd") + (libraries iostream stringext)) From 2413a3028c53bdd87be7fa033ec9927a4e109a43 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 00:23:43 -0500 Subject: [PATCH 02/17] wip --- .../tiny_httpd_multipart_form_data.ml | 337 ++++++++++++++++++ .../tiny_httpd_multipart_form_data.mli | 33 ++ src/multipart_form/utils_.ml | 62 ++++ 3 files changed, 432 insertions(+) create mode 100644 src/multipart_form/tiny_httpd_multipart_form_data.ml create mode 100644 src/multipart_form/tiny_httpd_multipart_form_data.mli create mode 100644 src/multipart_form/utils_.ml diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.ml b/src/multipart_form/tiny_httpd_multipart_form_data.ml new file mode 100644 index 00000000..ce753566 --- /dev/null +++ b/src/multipart_form/tiny_httpd_multipart_form_data.ml @@ -0,0 +1,337 @@ +(* ported from https://github.com/cryptosense/multipart-form-data . + License: BSD-2 *) + +open Utils_ + +(* TODO: redo some light form of lwt stream for porting purposes? *) +module Stream_ = struct + type 'a t = { next: unit -> 'a option } [@@unboxed] + + let from next : _ t = { next } + let[@inline] get (self : _ t) : _ option = self.next () + + let filter_map f (self : _ t) : _ t = + let rec next () = + match self.next () with + | None -> None + | Some x -> + (match f x with + | None -> next () + | Some _ as r -> r) + in + { next } +end + +let split s boundary = + let r = ref None in + let push v = + match !r with + | None -> r := Some v + | Some _ -> assert false + in + let pop () = + let res = !r in + r := None; + res + in + let go c0 = + let c = + match pop () with + | Some x -> x ^ c0 + | None -> c0 + in + let string_to_process = + match find_common_idx c boundary with + | None -> c + | Some idx -> + let prefix = String.sub c 0 idx in + let suffix = String.sub c idx (String.length c - idx) in + push suffix; + prefix + in + split_and_process_string ~boundary string_to_process + in + let initial = List.map go s in + let final = + Stream_.flatten + @@ Stream_.from (fun () -> + option_map (split_and_process_string ~boundary) @@ pop ()) + in + Stream_.append initial final + +let until_next_delim s = + Stream_.from @@ fun () -> + let res = Stream_.get s in + match res with + | None | Some `Delim -> None + | Some (`Word w) -> Some w + +let join s = + Stream_.filter_map + (function + (* | `Delim -> Some (until_next_delim @@ Lwt_stream.clone s) *) + | `Delim -> Some (until_next_delim s) + | `Word _ -> None) + s + +let align stream boundary = join @@ split stream boundary + +type header = string * string + +let extract_boundary content_type = + Stringext.chop_prefix ~prefix:"multipart/form-data; boundary=" content_type + +let unquote s = Scanf.sscanf s "%S" @@ fun x -> x + +let parse_name s = + option_map unquote @@ Stringext.chop_prefix ~prefix:"form-data; name=" s + +let parse_header s = + match Stringext.cut ~on:": " s with + | Some (key, value) -> key, value + | None -> invalid_arg "parse_header" + +let non_empty st = + let%lwt r = Lwt_stream.to_list @@ Lwt_stream.clone st in + Lwt.return (String.concat "" r <> "") + +let get_headers : string Lwt_stream.t Lwt_stream.t -> header list Lwt.t = + fun lines -> + let%lwt header_lines = Lwt_stream.get_while_s non_empty lines in + Lwt_list.map_s + (fun header_line_stream -> + let%lwt parts = Lwt_stream.to_list header_line_stream in + Lwt.return @@ parse_header @@ String.concat "" parts) + header_lines + +type stream_part = { headers: header list; body: string Lwt_stream.t } + +let parse_part chunk_stream = + let lines = align chunk_stream "\r\n" in + match%lwt get_headers lines with + | [] -> Lwt.return_none + | headers -> + let body = Lwt_stream.concat @@ Lwt_stream.clone lines in + Lwt.return_some { headers; body } + +let parse_stream ~stream ~content_type = + match extract_boundary content_type with + | None -> Lwt.fail_with "Cannot parse content-type" + | Some boundary -> + let actual_boundary = "--" ^ boundary in + Lwt.return + @@ Lwt_stream.filter_map_s parse_part + @@ align stream actual_boundary + +let s_part_body { body; _ } = body + +let s_part_name { headers; _ } = + match parse_name @@ List.assoc "Content-Disposition" headers with + | Some x -> x + | None -> invalid_arg "s_part_name" + +let parse_filename s = + let parts = split_on_string s ~pattern:"; " in + let f = function + | None -> None + | Some part -> + (match Stringext.cut part ~on:"=" with + | Some ("filename", quoted_string) -> Some (unquote quoted_string) + | _ -> None) + in + first_matching f parts + +let s_part_filename { headers; _ } = + parse_filename @@ List.assoc "Content-Disposition" headers + +type file = stream_part + +let file_stream = s_part_body +let file_name = s_part_name +let file_content_type { headers; _ } = List.assoc "Content-Type" headers + +let as_part part = + match s_part_filename part with + | Some _filename -> Lwt.return (`File part) + | None -> + let%lwt chunks = Lwt_stream.to_list part.body in + let body = String.concat "" chunks in + Lwt.return (`String body) + +let get_parts s = + let go part m = + let name = s_part_name part in + let%lwt parsed_part = as_part part in + Lwt.return @@ StringMap.add name parsed_part m + in + Lwt_stream.fold_s go s StringMap.empty + +let concat a b = + match a, b with + | _, "" -> a + | "", _ -> b + | _ -> a ^ b + +module Reader = struct + type t = { mutable buffer: string; source: string Lwt_stream.t } + + let make stream = { buffer = ""; source = stream } + let unread r s = r.buffer <- concat s r.buffer + + let empty r = + if r.buffer = "" then + Lwt_stream.is_empty r.source + else + Lwt.return false + + let read_next r = + let%lwt next_chunk = Lwt_stream.next r.source in + r.buffer <- concat r.buffer next_chunk; + Lwt.return_unit + + let read_chunk r = + try%lwt + let%lwt () = + if r.buffer = "" then + read_next r + else + Lwt.return_unit + in + let res = r.buffer in + r.buffer <- ""; + Lwt.return (Some res) + with Lwt_stream.Empty -> Lwt.return None + + let buffer_contains r s = + match Stringext.cut r.buffer ~on:s with + | Some _ -> true + | None -> false + + let rec read_until r cond = + if cond () then + Lwt.return_unit + else ( + let%lwt () = read_next r in + read_until r cond + ) + + let read_line r = + let delim = "\r\n" in + let%lwt () = read_until r (fun () -> buffer_contains r delim) in + match Stringext.cut r.buffer ~on:delim with + | None -> assert false + | Some (line, next) -> + r.buffer <- next; + Lwt.return (line ^ delim) +end + +let read_headers reader = + let rec go headers = + let%lwt line = Reader.read_line reader in + if line = "\r\n" then + Lwt.return headers + else ( + let header = parse_header line in + go (header :: headers) + ) + in + go [] + +let rec compute_case reader boundary = + match%lwt Reader.read_chunk reader with + | None -> Lwt.return `Empty + | Some line -> + (match Stringext.cut line ~on:(boundary ^ "\r\n") with + | Some (pre, post) -> Lwt.return @@ `Boundary (pre, post) + | None -> + (match Stringext.cut line ~on:(boundary ^ "--\r\n") with + | Some (pre, post) -> Lwt.return @@ `Boundary (pre, post) + | None -> + (match find_common_idx line boundary with + | Some 0 -> + Reader.unread reader line; + let%lwt () = Reader.read_next reader in + compute_case reader boundary + | Some amb_idx -> + let unambiguous = String.sub line 0 amb_idx in + let ambiguous = + String.sub line amb_idx (String.length line - amb_idx) + in + Lwt.return @@ `May_end_with_boundary (unambiguous, ambiguous) + | None -> Lwt.return @@ `App_data line))) + +let iter_part reader boundary callback = + let fin = ref false in + let last () = + fin := true; + Lwt.return_unit + in + let handle ~send ~unread ~finish = + let%lwt () = callback send in + Reader.unread reader unread; + if finish then + last () + else + Lwt.return_unit + in + while%lwt not !fin do + let%lwt res = compute_case reader boundary in + match res with + | `Empty -> last () + | `Boundary (pre, post) -> handle ~send:pre ~unread:post ~finish:true + | `May_end_with_boundary (unambiguous, ambiguous) -> + handle ~send:unambiguous ~unread:ambiguous ~finish:false + | `App_data line -> callback line + done + +let read_file_part reader boundary callback = iter_part reader boundary callback + +let strip_crlf s = + if ends_with ~suffix:"\r\n" ~suffix_length:2 s then + String.sub s 0 (String.length s - 2) + else + s + +let read_string_part reader boundary = + let value = Buffer.create 0 in + let append_to_value line = Lwt.return (Buffer.add_string value line) in + let%lwt () = iter_part reader boundary append_to_value in + Lwt.return @@ strip_crlf (Buffer.contents value) + +let read_part reader boundary callback fields = + let%lwt headers = read_headers reader in + let content_disposition = List.assoc "Content-Disposition" headers in + let name = + match parse_name content_disposition with + | Some x -> x + | None -> invalid_arg "handle_multipart" + in + match parse_filename content_disposition with + | Some filename -> read_file_part reader boundary (callback ~name ~filename) + | None -> + let%lwt value = read_string_part reader boundary in + fields := (name, value) :: !fields; + Lwt.return_unit + +let handle_multipart reader boundary callback = + let fields = (ref [] : (string * string) list ref) in + let%lwt () = + let%lwt _dummyline = Reader.read_line reader in + let fin = ref false in + while%lwt not !fin do + if%lwt Reader.empty reader then + Lwt.return (fin := true) + else + read_part reader boundary callback fields + done + in + Lwt.return !fields + +let parse ~stream ~content_type ~callback = + let reader = Reader.make stream in + let boundary = + match extract_boundary content_type with + | Some s -> "--" ^ s + | None -> invalid_arg "iter_multipart" + in + handle_multipart reader boundary callback diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.mli b/src/multipart_form/tiny_httpd_multipart_form_data.mli new file mode 100644 index 00000000..557324e7 --- /dev/null +++ b/src/multipart_form/tiny_httpd_multipart_form_data.mli @@ -0,0 +1,33 @@ +val align : string Lwt_stream.t -> string -> string Lwt_stream.t Lwt_stream.t +(** + Align a stream on a particular sequence and remove these boundaries. + *) + +type stream_part + +val s_part_name : stream_part -> string +val s_part_body : stream_part -> string Lwt_stream.t +val s_part_filename : stream_part -> string option + +val parse_stream : + stream:string Lwt_stream.t -> + content_type:string -> + stream_part Lwt_stream.t Lwt.t + +type file + +val file_name : file -> string +val file_content_type : file -> string +val file_stream : file -> string Lwt_stream.t + +module StringMap : Map.S with type key = string + +val get_parts : + stream_part Lwt_stream.t -> + [ `String of string | `File of file ] StringMap.t Lwt.t + +val parse : + stream:string Lwt_stream.t -> + content_type:string -> + callback:(name:string -> filename:string -> string -> unit Lwt.t) -> + (string * string) list Lwt.t diff --git a/src/multipart_form/utils_.ml b/src/multipart_form/utils_.ml new file mode 100644 index 00000000..4d1c5f56 --- /dev/null +++ b/src/multipart_form/utils_.ml @@ -0,0 +1,62 @@ +module StringMap = Map.Make (String) + +let string_eq ~a ~a_start ~b ~len = + let r = ref true in + for i = 0 to len - 1 do + let a_i = a_start + i in + let b_i = i in + if a.[a_i] <> b.[b_i] then r := false + done; + !r + +let ends_with ~suffix ~suffix_length s = + let s_length = String.length s in + s_length >= suffix_length + && string_eq ~a:s ~a_start:(s_length - suffix_length) ~b:suffix + ~len:suffix_length + +let rec first_matching p = function + | [] -> None + | x :: xs -> + (match p x with + | Some y -> Some y + | None -> first_matching p xs) + +let[@inline] option_map f = function + | None -> None + | Some x -> Some (f x) + +let find_common_idx a b = + let rec go i = + if i <= 0 then + None + else if ends_with ~suffix:b ~suffix_length:i a then + Some (String.length a - i) + else + go (i - 1) + in + go (String.length b) + +let[@inline] word = function + | "" -> [] + | w -> [ Some w ] + +let split_on_string ~pattern s = + let pattern_length = String.length pattern in + let rec go start acc = + match Stringext.find_from ~start s ~pattern with + | Some match_start -> + let before = String.sub s start (match_start - start) in + let new_acc = (None :: word before) @ acc in + let new_start = match_start + pattern_length in + go new_start new_acc + | None -> word (Stringext.string_after s start) @ acc + in + List.rev (go 0 []) + +let split_and_process_string ~boundary s = + let f = function + | None -> `Delim + | Some w -> `Word w + in + List.map f @@ split_on_string ~pattern:boundary s From 2968031e5beab7b2dbf2a9e5f3dd0a77d70212e3 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 11:46:40 -0500 Subject: [PATCH 03/17] wip: multipart --- src/multipart_form/dune | 2 +- .../tiny_httpd_multipart_form_data.ml | 432 ++++-------------- .../tiny_httpd_multipart_form_data.mli | 38 +- src/multipart_form/utils_.ml | 25 +- tests/multipart_form/dune | 4 + tests/multipart_form/t1.expected | 0 tests/multipart_form/t1.ml | 45 ++ 7 files changed, 172 insertions(+), 374 deletions(-) create mode 100644 tests/multipart_form/dune create mode 100644 tests/multipart_form/t1.expected create mode 100644 tests/multipart_form/t1.ml diff --git a/src/multipart_form/dune b/src/multipart_form/dune index 6ce7f437..6adfd691 100644 --- a/src/multipart_form/dune +++ b/src/multipart_form/dune @@ -2,4 +2,4 @@ (name tiny_httpd_multipart_form_data) (public_name tiny_httpd.multipart-form-data) (synopsis "Port of multipart-form-data for tiny_httpd") - (libraries iostream stringext)) + (libraries iostream tiny_httpd)) diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.ml b/src/multipart_form/tiny_httpd_multipart_form_data.ml index ce753566..ff2d3050 100644 --- a/src/multipart_form/tiny_httpd_multipart_form_data.ml +++ b/src/multipart_form/tiny_httpd_multipart_form_data.ml @@ -1,337 +1,101 @@ -(* ported from https://github.com/cryptosense/multipart-form-data . - License: BSD-2 *) - -open Utils_ - -(* TODO: redo some light form of lwt stream for porting purposes? *) -module Stream_ = struct - type 'a t = { next: unit -> 'a option } [@@unboxed] - - let from next : _ t = { next } - let[@inline] get (self : _ t) : _ option = self.next () - - let filter_map f (self : _ t) : _ t = - let rec next () = - match self.next () with - | None -> None - | Some x -> - (match f x with - | None -> next () - | Some _ as r -> r) - in - { next } -end - -let split s boundary = - let r = ref None in - let push v = - match !r with - | None -> r := Some v - | Some _ -> assert false - in - let pop () = - let res = !r in - r := None; - res - in - let go c0 = - let c = - match pop () with - | Some x -> x ^ c0 - | None -> c0 - in - let string_to_process = - match find_common_idx c boundary with - | None -> c - | Some idx -> - let prefix = String.sub c 0 idx in - let suffix = String.sub c idx (String.length c - idx) in - push suffix; - prefix +(* ported from https://github.com/cryptosense/multipart-form-data . *) + +open Tiny_httpd + +type st = { + delim: string; + ic: Iostream.In.t; + buf_split: bytes; (** Used to split on the delimiter *) + mutable buf_len: int; + buf_line: Buf.t; + mutable eof: bool; +} + +let create ?(buf_size = 64 * 1024) ~delim ic : st = + let ic = (ic : #Iostream.In.t :> Iostream.In.t) in + { + delim; + ic; + buf_split = Bytes.create buf_size; + buf_len = 0; + buf_line = Buf.create ~size:256 (); + eof = false; + } + +type chunk = Delim | Eof | Read of int + +let[@inline] min_len_ (self : st) : int = 2 + String.length self.delim + +let shift_left_ (self : st) n = + if n = self.buf_len then + self.buf_len <- 0 + else ( + assert (n < self.buf_len); + Bytes.blit self.buf_split n self.buf_split 0 (self.buf_len - n); + self.buf_len <- self.buf_len - n + ) + +exception Found_delim of int + +let rec read_chunk_ (self : st) buf i_buf len : chunk = + if self.eof then + Eof + else if self.buf_len < min_len_ self then ( + (* try to refill buffer *) + let n = + Iostream.In.input self.ic self.buf_split self.buf_len + (Bytes.length self.buf_split - self.buf_len) in - split_and_process_string ~boundary string_to_process - in - let initial = List.map go s in - let final = - Stream_.flatten - @@ Stream_.from (fun () -> - option_map (split_and_process_string ~boundary) @@ pop ()) - in - Stream_.append initial final - -let until_next_delim s = - Stream_.from @@ fun () -> - let res = Stream_.get s in - match res with - | None | Some `Delim -> None - | Some (`Word w) -> Some w - -let join s = - Stream_.filter_map - (function - (* | `Delim -> Some (until_next_delim @@ Lwt_stream.clone s) *) - | `Delim -> Some (until_next_delim s) - | `Word _ -> None) - s - -let align stream boundary = join @@ split stream boundary - -type header = string * string - -let extract_boundary content_type = - Stringext.chop_prefix ~prefix:"multipart/form-data; boundary=" content_type - -let unquote s = Scanf.sscanf s "%S" @@ fun x -> x - -let parse_name s = - option_map unquote @@ Stringext.chop_prefix ~prefix:"form-data; name=" s - -let parse_header s = - match Stringext.cut ~on:": " s with - | Some (key, value) -> key, value - | None -> invalid_arg "parse_header" - -let non_empty st = - let%lwt r = Lwt_stream.to_list @@ Lwt_stream.clone st in - Lwt.return (String.concat "" r <> "") - -let get_headers : string Lwt_stream.t Lwt_stream.t -> header list Lwt.t = - fun lines -> - let%lwt header_lines = Lwt_stream.get_while_s non_empty lines in - Lwt_list.map_s - (fun header_line_stream -> - let%lwt parts = Lwt_stream.to_list header_line_stream in - Lwt.return @@ parse_header @@ String.concat "" parts) - header_lines - -type stream_part = { headers: header list; body: string Lwt_stream.t } - -let parse_part chunk_stream = - let lines = align chunk_stream "\r\n" in - match%lwt get_headers lines with - | [] -> Lwt.return_none - | headers -> - let body = Lwt_stream.concat @@ Lwt_stream.clone lines in - Lwt.return_some { headers; body } - -let parse_stream ~stream ~content_type = - match extract_boundary content_type with - | None -> Lwt.fail_with "Cannot parse content-type" - | Some boundary -> - let actual_boundary = "--" ^ boundary in - Lwt.return - @@ Lwt_stream.filter_map_s parse_part - @@ align stream actual_boundary - -let s_part_body { body; _ } = body - -let s_part_name { headers; _ } = - match parse_name @@ List.assoc "Content-Disposition" headers with - | Some x -> x - | None -> invalid_arg "s_part_name" - -let parse_filename s = - let parts = split_on_string s ~pattern:"; " in - let f = function - | None -> None - | Some part -> - (match Stringext.cut part ~on:"=" with - | Some ("filename", quoted_string) -> Some (unquote quoted_string) - | _ -> None) - in - first_matching f parts - -let s_part_filename { headers; _ } = - parse_filename @@ List.assoc "Content-Disposition" headers - -type file = stream_part - -let file_stream = s_part_body -let file_name = s_part_name -let file_content_type { headers; _ } = List.assoc "Content-Type" headers - -let as_part part = - match s_part_filename part with - | Some _filename -> Lwt.return (`File part) - | None -> - let%lwt chunks = Lwt_stream.to_list part.body in - let body = String.concat "" chunks in - Lwt.return (`String body) - -let get_parts s = - let go part m = - let name = s_part_name part in - let%lwt parsed_part = as_part part in - Lwt.return @@ StringMap.add name parsed_part m - in - Lwt_stream.fold_s go s StringMap.empty - -let concat a b = - match a, b with - | _, "" -> a - | "", _ -> b - | _ -> a ^ b - -module Reader = struct - type t = { mutable buffer: string; source: string Lwt_stream.t } - - let make stream = { buffer = ""; source = stream } - let unread r s = r.buffer <- concat s r.buffer - - let empty r = - if r.buffer = "" then - Lwt_stream.is_empty r.source - else - Lwt.return false - - let read_next r = - let%lwt next_chunk = Lwt_stream.next r.source in - r.buffer <- concat r.buffer next_chunk; - Lwt.return_unit - - let read_chunk r = - try%lwt - let%lwt () = - if r.buffer = "" then - read_next r - else - Lwt.return_unit - in - let res = r.buffer in - r.buffer <- ""; - Lwt.return (Some res) - with Lwt_stream.Empty -> Lwt.return None - - let buffer_contains r s = - match Stringext.cut r.buffer ~on:s with - | Some _ -> true - | None -> false - - let rec read_until r cond = - if cond () then - Lwt.return_unit - else ( - let%lwt () = read_next r in - read_until r cond + Printf.eprintf "refill n=%d\n%!" n; + if n = 0 && self.buf_len = 0 then ( + self.eof <- true; + Eof + ) else if n = 0 then ( + let n_read = min len self.buf_len in + Bytes.blit self.buf_split 0 buf i_buf n_read; + shift_left_ self n_read; + Read n_read + ) else ( + self.buf_len <- self.buf_len + n; + read_chunk_ self buf i_buf len ) - - let read_line r = - let delim = "\r\n" in - let%lwt () = read_until r (fun () -> buffer_contains r delim) in - match Stringext.cut r.buffer ~on:delim with - | None -> assert false - | Some (line, next) -> - r.buffer <- next; - Lwt.return (line ^ delim) + ) else ( + Printf.eprintf "normal path buflen=%d buf=%S\n%!" self.buf_len + (Bytes.sub_string self.buf_split 0 self.buf_len); + try + let i = ref 0 in + let end_pos = min len self.buf_len - 2 - String.length self.delim in + while !i <= end_pos do + Printf.eprintf "at %d\n%!" !i; + if + Bytes.unsafe_get self.buf_split !i = '-' + && Bytes.unsafe_get self.buf_split (!i + 1) = '-' + && Utils_.string_eq + ~a:(Bytes.unsafe_to_string self.buf_split) + ~a_start:(!i + 2) ~b:self.delim ~len:(String.length self.delim) + then + raise_notrace (Found_delim !i); + incr i + done; + let n_read = min !i len in + Bytes.blit self.buf_split 0 buf i_buf n_read; + shift_left_ self n_read; + Read n_read + with + | Found_delim 0 -> + Printf.eprintf "found delim at 0\n%!"; + shift_left_ self (2 + String.length self.delim); + Delim + | Found_delim n -> + Printf.eprintf "found delim at %d\n%!" n; + let n_read = min n len in + Bytes.blit self.buf_split 0 buf i_buf n_read; + shift_left_ self n_read; + Read n_read + ) + +module Private_ = struct + type nonrec chunk = chunk = Delim | Eof | Read of int + + let read_chunk_ = read_chunk_ end - -let read_headers reader = - let rec go headers = - let%lwt line = Reader.read_line reader in - if line = "\r\n" then - Lwt.return headers - else ( - let header = parse_header line in - go (header :: headers) - ) - in - go [] - -let rec compute_case reader boundary = - match%lwt Reader.read_chunk reader with - | None -> Lwt.return `Empty - | Some line -> - (match Stringext.cut line ~on:(boundary ^ "\r\n") with - | Some (pre, post) -> Lwt.return @@ `Boundary (pre, post) - | None -> - (match Stringext.cut line ~on:(boundary ^ "--\r\n") with - | Some (pre, post) -> Lwt.return @@ `Boundary (pre, post) - | None -> - (match find_common_idx line boundary with - | Some 0 -> - Reader.unread reader line; - let%lwt () = Reader.read_next reader in - compute_case reader boundary - | Some amb_idx -> - let unambiguous = String.sub line 0 amb_idx in - let ambiguous = - String.sub line amb_idx (String.length line - amb_idx) - in - Lwt.return @@ `May_end_with_boundary (unambiguous, ambiguous) - | None -> Lwt.return @@ `App_data line))) - -let iter_part reader boundary callback = - let fin = ref false in - let last () = - fin := true; - Lwt.return_unit - in - let handle ~send ~unread ~finish = - let%lwt () = callback send in - Reader.unread reader unread; - if finish then - last () - else - Lwt.return_unit - in - while%lwt not !fin do - let%lwt res = compute_case reader boundary in - match res with - | `Empty -> last () - | `Boundary (pre, post) -> handle ~send:pre ~unread:post ~finish:true - | `May_end_with_boundary (unambiguous, ambiguous) -> - handle ~send:unambiguous ~unread:ambiguous ~finish:false - | `App_data line -> callback line - done - -let read_file_part reader boundary callback = iter_part reader boundary callback - -let strip_crlf s = - if ends_with ~suffix:"\r\n" ~suffix_length:2 s then - String.sub s 0 (String.length s - 2) - else - s - -let read_string_part reader boundary = - let value = Buffer.create 0 in - let append_to_value line = Lwt.return (Buffer.add_string value line) in - let%lwt () = iter_part reader boundary append_to_value in - Lwt.return @@ strip_crlf (Buffer.contents value) - -let read_part reader boundary callback fields = - let%lwt headers = read_headers reader in - let content_disposition = List.assoc "Content-Disposition" headers in - let name = - match parse_name content_disposition with - | Some x -> x - | None -> invalid_arg "handle_multipart" - in - match parse_filename content_disposition with - | Some filename -> read_file_part reader boundary (callback ~name ~filename) - | None -> - let%lwt value = read_string_part reader boundary in - fields := (name, value) :: !fields; - Lwt.return_unit - -let handle_multipart reader boundary callback = - let fields = (ref [] : (string * string) list ref) in - let%lwt () = - let%lwt _dummyline = Reader.read_line reader in - let fin = ref false in - while%lwt not !fin do - if%lwt Reader.empty reader then - Lwt.return (fin := true) - else - read_part reader boundary callback fields - done - in - Lwt.return !fields - -let parse ~stream ~content_type ~callback = - let reader = Reader.make stream in - let boundary = - match extract_boundary content_type with - | Some s -> "--" ^ s - | None -> invalid_arg "iter_multipart" - in - handle_multipart reader boundary callback diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.mli b/src/multipart_form/tiny_httpd_multipart_form_data.mli index 557324e7..985154ac 100644 --- a/src/multipart_form/tiny_httpd_multipart_form_data.mli +++ b/src/multipart_form/tiny_httpd_multipart_form_data.mli @@ -1,33 +1,11 @@ -val align : string Lwt_stream.t -> string -> string Lwt_stream.t Lwt_stream.t -(** - Align a stream on a particular sequence and remove these boundaries. - *) +type st -type stream_part +val create : ?buf_size:int -> delim:string -> #Iostream.In.t -> st -val s_part_name : stream_part -> string -val s_part_body : stream_part -> string Lwt_stream.t -val s_part_filename : stream_part -> string option +(**/*) +module Private_ : sig + type chunk = Delim | Eof | Read of int -val parse_stream : - stream:string Lwt_stream.t -> - content_type:string -> - stream_part Lwt_stream.t Lwt.t - -type file - -val file_name : file -> string -val file_content_type : file -> string -val file_stream : file -> string Lwt_stream.t - -module StringMap : Map.S with type key = string - -val get_parts : - stream_part Lwt_stream.t -> - [ `String of string | `File of file ] StringMap.t Lwt.t - -val parse : - stream:string Lwt_stream.t -> - content_type:string -> - callback:(name:string -> filename:string -> string -> unit Lwt.t) -> - (string * string) list Lwt.t + val read_chunk_ : st -> bytes -> int -> int -> chunk +end +(**/*) diff --git a/src/multipart_form/utils_.ml b/src/multipart_form/utils_.ml index 4d1c5f56..6d56e698 100644 --- a/src/multipart_form/utils_.ml +++ b/src/multipart_form/utils_.ml @@ -1,13 +1,18 @@ -module StringMap = Map.Make (String) +(* module StringMap = Map.Make (String) *) -let string_eq ~a ~a_start ~b ~len = - let r = ref true in - for i = 0 to len - 1 do - let a_i = a_start + i in - let b_i = i in - if a.[a_i] <> b.[b_i] then r := false - done; - !r +let string_eq ~a ~a_start ~b ~len : bool = + assert (len <= String.length b); + if String.length a >= a_start + len then ( + try + for i = 0 to len - 1 do + let a_i = a_start + i in + if String.unsafe_get a a_i <> String.unsafe_get b i then + raise_notrace Exit + done; + true + with Exit -> false + ) else + false let ends_with ~suffix ~suffix_length s = let s_length = String.length s in @@ -37,6 +42,7 @@ let find_common_idx a b = in go (String.length b) +(* let[@inline] word = function | "" -> [] | w -> [ Some w ] @@ -60,3 +66,4 @@ let split_and_process_string ~boundary s = | Some w -> `Word w in List.map f @@ split_on_string ~pattern:boundary s + *) diff --git a/tests/multipart_form/dune b/tests/multipart_form/dune new file mode 100644 index 00000000..3590e207 --- /dev/null +++ b/tests/multipart_form/dune @@ -0,0 +1,4 @@ + +(tests + (names t1) + (libraries tiny_httpd tiny_httpd.multipart-form-data)) diff --git a/tests/multipart_form/t1.expected b/tests/multipart_form/t1.expected new file mode 100644 index 00000000..e69de29b diff --git a/tests/multipart_form/t1.ml b/tests/multipart_form/t1.ml new file mode 100644 index 00000000..f68edec6 --- /dev/null +++ b/tests/multipart_form/t1.ml @@ -0,0 +1,45 @@ +module MFD = Tiny_httpd_multipart_form_data + +let pf = Printf.printf + +let read_stream (st : MFD.st) : _ list = + let l = ref [] in + let buf = Bytes.create 12 in + let buffer = Buffer.create 32 in + let rec loop () = + match MFD.Private_.read_chunk_ st buf 0 (Bytes.length buf) with + | Delim -> + if Buffer.length buffer > 0 then l := `Str (Buffer.contents buffer) :: !l; + Buffer.clear buffer; + l := `Delim :: !l; + loop () + | Read n -> + Buffer.add_subbytes buffer buf 0 n; + loop () + | Eof -> + if Buffer.length buffer > 0 then l := `Str (Buffer.contents buffer) :: !l; + List.rev !l + in + loop () + +let test input_str = + let st = + MFD.create ~buf_size:16 ~delim:"YOLO" (Iostream.In.of_string input_str) + in + let chunks = read_stream st in + List.iter + (function + | `Delim -> pf "delim\n" + | `Str s -> pf "chunk %S\n" s) + chunks; + () + +let () = + pf "T1\n"; + test + {|hello--YOLO + world + what is the meaning of--YOLOthis??--YOLOok ok ok--YOLO|}; + pf "T2\n"; + test "--YOLO--YOLOah bon--YOLOaight--YOLO--YOLO"; + () From bde09435b4d5b8d03c3d3ba3f58c2f56d1b53755 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 11:48:33 -0500 Subject: [PATCH 04/17] more test --- tests/multipart_form/t1.expected | 25 +++++++++++++++++++++++++ tests/multipart_form/t1.ml | 5 +++++ 2 files changed, 30 insertions(+) diff --git a/tests/multipart_form/t1.expected b/tests/multipart_form/t1.expected index e69de29b..9591feee 100644 --- a/tests/multipart_form/t1.expected +++ b/tests/multipart_form/t1.expected @@ -0,0 +1,25 @@ +T1 +chunk "hello" +delim +chunk "\n world\n what is the meaning of" +delim +chunk "this??" +delim +chunk "ok ok ok" +delim +T2 +delim +delim +chunk "ah bon" +delim +chunk "aight" +delim +delim +T3 +delim +chunk "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +delim +delim +chunk "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" +delim +chunk "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc" diff --git a/tests/multipart_form/t1.ml b/tests/multipart_form/t1.ml index f68edec6..221bde28 100644 --- a/tests/multipart_form/t1.ml +++ b/tests/multipart_form/t1.ml @@ -1,5 +1,6 @@ module MFD = Tiny_httpd_multipart_form_data +let spf = Printf.sprintf let pf = Printf.printf let read_stream (st : MFD.st) : _ list = @@ -42,4 +43,8 @@ let () = what is the meaning of--YOLOthis??--YOLOok ok ok--YOLO|}; pf "T2\n"; test "--YOLO--YOLOah bon--YOLOaight--YOLO--YOLO"; + pf "T3\n"; + test + (spf "--YOLO%s--YOLO--YOLO%s--YOLO%s" (String.make 400 'a') + (String.make 400 'b') (String.make 400 'c')); () From e1bfe709912a625d035f88a980f7daa84a5fe907 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 14:19:06 -0500 Subject: [PATCH 05/17] feat headers: expose parsing helper --- src/core/headers.ml | 29 +++++++++++++++++++---------- src/core/headers.mli | 4 ++++ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/core/headers.ml b/src/core/headers.ml index 1c1d8f87..19f06a3d 100644 --- a/src/core/headers.ml +++ b/src/core/headers.ml @@ -46,6 +46,21 @@ let for_all pred s = true with Exit -> false +let parse_line_ (line : string) : _ result = + try + let i = + try String.index line ':' + with Not_found -> failwith "invalid header, missing ':'" + in + let k = String.sub line 0 i in + if not (for_all is_tchar k) then + failwith (Printf.sprintf "Invalid header key: %S" k); + let v = + String.sub line (i + 1) (String.length line - i - 1) |> String.trim + in + Ok (k, v) + with Failure msg -> Error msg + let parse_ ~(buf : Buf.t) (bs : IO.Input.t) : t = let rec loop acc = match IO.Input.read_line_using_opt ~buf bs with @@ -56,16 +71,10 @@ let parse_ ~(buf : Buf.t) (bs : IO.Input.t) : t = bad_reqf 400 "bad header line, not ended in CRLF" | Some line -> let k, v = - try - let i = String.index line ':' in - let k = String.sub line 0 i in - if not (for_all is_tchar k) then - invalid_arg (Printf.sprintf "Invalid header key: %S" k); - let v = - String.sub line (i + 1) (String.length line - i - 1) |> String.trim - in - k, v - with _ -> bad_reqf 400 "invalid header line: %S" line + match parse_line_ line with + | Ok r -> r + | Error msg -> + bad_reqf 400 "invalid header line: %s\nline is: %S" msg line in loop ((String.lowercase_ascii k, v) :: acc) in diff --git a/src/core/headers.mli b/src/core/headers.mli index b46b5d54..67feb9a8 100644 --- a/src/core/headers.mli +++ b/src/core/headers.mli @@ -33,3 +33,7 @@ val pp : Format.formatter -> t -> unit (** Pretty print the headers. *) val parse_ : buf:Buf.t -> IO.Input.t -> t +(**/*) + +val parse_line_ : string -> (string * string, string) result +(**/*) From c966d1839c2331917408424ce3e5427b340fc9ed Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 14:19:26 -0500 Subject: [PATCH 06/17] feat multipart: first ok implementation --- .../tiny_httpd_multipart_form_data.ml | 212 +++++++++++++----- .../tiny_httpd_multipart_form_data.mli | 10 +- 2 files changed, 170 insertions(+), 52 deletions(-) diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.ml b/src/multipart_form/tiny_httpd_multipart_form_data.ml index ff2d3050..6529f0e6 100644 --- a/src/multipart_form/tiny_httpd_multipart_form_data.ml +++ b/src/multipart_form/tiny_httpd_multipart_form_data.ml @@ -1,99 +1,209 @@ (* ported from https://github.com/cryptosense/multipart-form-data . *) open Tiny_httpd +module Slice = Iostream.Slice + +let spf = Printf.sprintf + +type buf = { bs: bytes; mutable len: int } + +let shift_left_ (self : buf) n = + if n = self.len then + self.len <- 0 + else ( + assert (n < self.len); + Bytes.blit self.bs n self.bs 0 (self.len - n); + self.len <- self.len - n + ) + +let[@inline] buf_full (self : buf) : bool = self.len >= Bytes.length self.bs + +type slice = Iostream.Slice.t +type event = Part of Tiny_httpd.Headers.t | Read of slice | End_of_input +type out_state = Begin | Inside_part | Eof type st = { - delim: string; + boundary: string; ic: Iostream.In.t; - buf_split: bytes; (** Used to split on the delimiter *) - mutable buf_len: int; - buf_line: Buf.t; - mutable eof: bool; + buf: buf; (** Used to split on the boundary *) + mutable eof_split: bool; + buf_out: buf; (** Used to return output slices *) + mutable st_out: out_state; } -let create ?(buf_size = 64 * 1024) ~delim ic : st = +let create ?(buf_size = 64 * 1024) ?(out_buf_size = 8 * 1024) ~boundary ic : st + = let ic = (ic : #Iostream.In.t :> Iostream.In.t) in { - delim; + boundary; ic; - buf_split = Bytes.create buf_size; - buf_len = 0; - buf_line = Buf.create ~size:256 (); - eof = false; + buf = { bs = Bytes.create buf_size; len = 0 }; + eof_split = false; + buf_out = { bs = Bytes.create out_buf_size; len = 0 }; + st_out = Begin; } type chunk = Delim | Eof | Read of int -let[@inline] min_len_ (self : st) : int = 2 + String.length self.delim +let[@inline] min_len_ (self : st) : int = 4 + String.length self.boundary -let shift_left_ (self : st) n = - if n = self.buf_len then - self.buf_len <- 0 - else ( - assert (n < self.buf_len); - Bytes.blit self.buf_split n self.buf_split 0 (self.buf_len - n); - self.buf_len <- self.buf_len - n - ) - -exception Found_delim of int +exception Found_boundary of int let rec read_chunk_ (self : st) buf i_buf len : chunk = - if self.eof then + if self.eof_split then Eof - else if self.buf_len < min_len_ self then ( + else if self.buf.len < min_len_ self then ( (* try to refill buffer *) let n = - Iostream.In.input self.ic self.buf_split self.buf_len - (Bytes.length self.buf_split - self.buf_len) + Iostream.In.input self.ic self.buf.bs self.buf.len + (Bytes.length self.buf.bs - self.buf.len) in - Printf.eprintf "refill n=%d\n%!" n; - if n = 0 && self.buf_len = 0 then ( - self.eof <- true; + if n = 0 && self.buf.len = 0 then ( + self.eof_split <- true; Eof ) else if n = 0 then ( - let n_read = min len self.buf_len in - Bytes.blit self.buf_split 0 buf i_buf n_read; - shift_left_ self n_read; + let n_read = min len self.buf.len in + Bytes.blit self.buf.bs 0 buf i_buf n_read; + shift_left_ self.buf n_read; Read n_read ) else ( - self.buf_len <- self.buf_len + n; + self.buf.len <- self.buf.len + n; read_chunk_ self buf i_buf len ) ) else ( - Printf.eprintf "normal path buflen=%d buf=%S\n%!" self.buf_len - (Bytes.sub_string self.buf_split 0 self.buf_len); try let i = ref 0 in - let end_pos = min len self.buf_len - 2 - String.length self.delim in + let end_pos = min len self.buf.len - 4 - String.length self.boundary in while !i <= end_pos do - Printf.eprintf "at %d\n%!" !i; if - Bytes.unsafe_get self.buf_split !i = '-' - && Bytes.unsafe_get self.buf_split (!i + 1) = '-' + Bytes.unsafe_get self.buf.bs !i = '\r' + && Bytes.unsafe_get self.buf.bs (!i + 1) = '\n' + && Bytes.unsafe_get self.buf.bs (!i + 2) = '-' + && Bytes.unsafe_get self.buf.bs (!i + 3) = '-' && Utils_.string_eq - ~a:(Bytes.unsafe_to_string self.buf_split) - ~a_start:(!i + 2) ~b:self.delim ~len:(String.length self.delim) + ~a:(Bytes.unsafe_to_string self.buf.bs) + ~a_start:(!i + 4) ~b:self.boundary + ~len:(String.length self.boundary) then - raise_notrace (Found_delim !i); + raise_notrace (Found_boundary !i); incr i done; let n_read = min !i len in - Bytes.blit self.buf_split 0 buf i_buf n_read; - shift_left_ self n_read; + Bytes.blit self.buf.bs 0 buf i_buf n_read; + shift_left_ self.buf n_read; Read n_read with - | Found_delim 0 -> - Printf.eprintf "found delim at 0\n%!"; - shift_left_ self (2 + String.length self.delim); + | Found_boundary 0 -> + shift_left_ self.buf (4 + String.length self.boundary); Delim - | Found_delim n -> - Printf.eprintf "found delim at %d\n%!" n; + | Found_boundary n -> let n_read = min n len in - Bytes.blit self.buf_split 0 buf i_buf n_read; - shift_left_ self n_read; + Bytes.blit self.buf.bs 0 buf i_buf n_read; + shift_left_ self.buf n_read; Read n_read ) +exception Found of int + +(** Find \r\n *) +let find_crlf_exn (buf : buf) : int = + try + for i = 0 to buf.len - 2 do + if + Bytes.unsafe_get buf.bs i = '\r' + && Bytes.unsafe_get buf.bs (i + 1) = '\n' + then + raise_notrace (Found i) + done; + raise Not_found + with Found i -> i + +let[@inline] read_to_buf_out_ (self : st) = + assert (not (buf_full self.buf_out)); + read_chunk_ self self.buf_out.bs self.buf_out.len + (Bytes.length self.buf_out.bs - self.buf_out.len) + +let read_data_or_fail_ (self : st) : unit = + match read_to_buf_out_ self with + | Delim -> failwith "multipart: unexpected boundary while parsing headers" + | Eof -> failwith "multipart: unexpected EOF while parsing headers" + | Read n -> self.buf_out.len <- self.buf_out.len + n + +let rec next (self : st) : event = + match self.st_out with + | Eof -> End_of_input + | Inside_part when self.buf_out.len > 0 -> + (* there's data to return *) + let sl = + { Slice.bytes = self.buf_out.bs; off = 0; len = self.buf_out.len } + in + self.buf_out.len <- 0; + Read sl + | Inside_part -> + (* refill or reach boundary *) + (match read_to_buf_out_ self with + | Eof -> + self.st_out <- Eof; + End_of_input + | Delim -> parse_after_boundary self + | Read n -> + self.buf_out.len <- n; + next self) + | Begin -> + (match read_to_buf_out_ self with + | Delim -> parse_after_boundary self + | Eof -> + self.st_out <- Eof; + End_of_input + | Read _ -> failwith "multipart: expected boundary, got data") + +and parse_after_boundary (self : st) : event = + while self.buf_out.len < 2 do + read_data_or_fail_ self + done; + + let after_boundary = Bytes.sub_string self.buf_out.bs 0 2 in + shift_left_ self.buf_out 2; + match after_boundary with + | "--" -> + self.st_out <- Eof; + End_of_input + | "\r\n" -> + let headers = parse_headers_rec self [] in + self.st_out <- Inside_part; + Part headers + | s -> + failwith (spf "multipart: expect '--' or '\r\n' after boundary, got %S" s) + +and parse_headers_rec (self : st) acc : Headers.t = + if self.buf_out.len = 0 then ( + read_data_or_fail_ self; + parse_headers_rec self acc + ) else ( + match find_crlf_exn self.buf_out with + | exception Not_found -> + if buf_full self.buf_out then + failwith "multipart: header line is too long" + else ( + read_data_or_fail_ self; + parse_headers_rec self acc + ) + | i -> + let line = Bytes.sub_string self.buf_out.bs 0 i in + Printf.eprintf "parse header line %S\n%!" line; + shift_left_ self.buf_out (i + 2); + if line = "" then + List.rev acc + else ( + match Tiny_httpd.Headers.parse_line_ line with + | Ok (k, v) -> + parse_headers_rec self ((String.lowercase_ascii k, v) :: acc) + | Error msg -> + failwith + (spf "multipart: failed to parser header: %s\nline: %S" msg line) + ) + ) + module Private_ = struct type nonrec chunk = chunk = Delim | Eof | Read of int diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.mli b/src/multipart_form/tiny_httpd_multipart_form_data.mli index 985154ac..009dbc47 100644 --- a/src/multipart_form/tiny_httpd_multipart_form_data.mli +++ b/src/multipart_form/tiny_httpd_multipart_form_data.mli @@ -1,6 +1,14 @@ +(** Parser for multipart/form-data *) type st +(** Parser state *) -val create : ?buf_size:int -> delim:string -> #Iostream.In.t -> st +val create : + ?buf_size:int -> ?out_buf_size:int -> boundary:string -> #Iostream.In.t -> st + +type slice = Iostream.Slice.t +type event = Part of Tiny_httpd.Headers.t | Read of slice | End_of_input + +val next : st -> event (**/*) module Private_ : sig From 3f3716164903130f4cc2edfd28c908491b07090d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 14:19:40 -0500 Subject: [PATCH 07/17] test: more tests for multipart form data --- tests/multipart_form/dune | 2 +- tests/multipart_form/t1.expected | 2 +- tests/multipart_form/t1.ml | 18 +++++--- tests/multipart_form/t2.expected | 12 ++++++ tests/multipart_form/t2.ml | 74 ++++++++++++++++++++++++++++++++ 5 files changed, 99 insertions(+), 9 deletions(-) create mode 100644 tests/multipart_form/t2.expected create mode 100644 tests/multipart_form/t2.ml diff --git a/tests/multipart_form/dune b/tests/multipart_form/dune index 3590e207..e45df39e 100644 --- a/tests/multipart_form/dune +++ b/tests/multipart_form/dune @@ -1,4 +1,4 @@ (tests - (names t1) + (names t1 t2) (libraries tiny_httpd tiny_httpd.multipart-form-data)) diff --git a/tests/multipart_form/t1.expected b/tests/multipart_form/t1.expected index 9591feee..97b413f8 100644 --- a/tests/multipart_form/t1.expected +++ b/tests/multipart_form/t1.expected @@ -20,6 +20,6 @@ delim chunk "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" delim delim -chunk "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" +chunk "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" delim chunk "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc" diff --git a/tests/multipart_form/t1.ml b/tests/multipart_form/t1.ml index 221bde28..346803ed 100644 --- a/tests/multipart_form/t1.ml +++ b/tests/multipart_form/t1.ml @@ -25,7 +25,7 @@ let read_stream (st : MFD.st) : _ list = let test input_str = let st = - MFD.create ~buf_size:16 ~delim:"YOLO" (Iostream.In.of_string input_str) + MFD.create ~buf_size:16 ~boundary:"YOLO" (Iostream.In.of_string input_str) in let chunks = read_stream st in List.iter @@ -38,13 +38,17 @@ let test input_str = let () = pf "T1\n"; test - {|hello--YOLO - world - what is the meaning of--YOLOthis??--YOLOok ok ok--YOLO|}; + "hello\r\n\ + --YOLO\n\ + \ world\n\ + \ what is the meaning of\r\n\ + --YOLOthis??\r\n\ + --YOLOok ok ok\r\n\ + --YOLO"; pf "T2\n"; - test "--YOLO--YOLOah bon--YOLOaight--YOLO--YOLO"; + test "\r\n--YOLO\r\n--YOLOah bon\r\n--YOLOaight\r\n--YOLO\r\n--YOLO"; pf "T3\n"; test - (spf "--YOLO%s--YOLO--YOLO%s--YOLO%s" (String.make 400 'a') - (String.make 400 'b') (String.make 400 'c')); + (spf "\r\n--YOLO%s\r\n--YOLO\r\n--YOLO%s\r\n--YOLO%s" (String.make 400 'a') + (String.make 512 'b') (String.make 400 'c')); () diff --git a/tests/multipart_form/t2.expected b/tests/multipart_form/t2.expected new file mode 100644 index 00000000..9a1bd7dd --- /dev/null +++ b/tests/multipart_form/t2.expected @@ -0,0 +1,12 @@ +T1 +part ["some-super-cool":"header here";"ohlook":"here"] +chunk "and now for the b-o-d-y \240\159\145\143\n" +part ["more":"headers"] +chunk "and another body\r\n" +end of input +T1 +part ["some-super-cool":"header here";"ohlook":"here"] +chunk "and now for the bigger body:\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n" +part ["more":"headers"] +chunk "and another body" +end of input diff --git a/tests/multipart_form/t2.ml b/tests/multipart_form/t2.ml new file mode 100644 index 00000000..955968b5 --- /dev/null +++ b/tests/multipart_form/t2.ml @@ -0,0 +1,74 @@ +module MFD = Tiny_httpd_multipart_form_data + +let spf = Printf.sprintf +let pf = Printf.printf + +let read_stream (st : MFD.st) : _ list = + let l = ref [] in + let buffer = Buffer.create 32 in + let rec loop () = + match MFD.next st with + | Part headers -> + if Buffer.length buffer > 0 then l := `Str (Buffer.contents buffer) :: !l; + Buffer.clear buffer; + l := `Part headers :: !l; + loop () + | Read sl -> + Buffer.add_subbytes buffer sl.bytes sl.off sl.len; + loop () + | End_of_input -> + if Buffer.length buffer > 0 then l := `Str (Buffer.contents buffer) :: !l; + l := `End_of_input :: !l; + List.rev !l + in + loop () + +let test input_str = + let st = + MFD.create ~buf_size:16 ~boundary:"YOLO" (Iostream.In.of_string input_str) + in + let chunks = read_stream st in + List.iter + (function + | `End_of_input -> pf "end of input\n" + | `Part hs -> + pf "part [%s]\n" + (String.concat ";" @@ List.map (fun (k, v) -> spf "%S:%S" k v) hs) + | `Str s -> pf "chunk %S\n" s) + chunks; + () + +let () = + pf "T1\n"; + test + "\r\n\ + --YOLO\r\n\ + some-super-cool: header here\r\n\ + ohlook: here\r\n\ + \r\n\ + and now for the b-o-d-y 👏\n\ + \r\n\ + --YOLO\r\n\ + more: headers\r\n\ + \r\n\ + and another body\r\n\ + \r\n\ + --YOLO--"; + pf "T1\n"; + test + (spf + "\r\n\ + --YOLO\r\n\ + some-super-cool: header here\r\n\ + ohlook: here\r\n\ + \r\n\ + and now for the bigger body:\n\ + %s\n\ + \r\n\ + --YOLO\r\n\ + more: headers\r\n\ + \r\n\ + and another body\r\n\ + --YOLO--" + (String.make 500 'a')); + () From a5a06f0159b29d2f8aecb83acacb423f96fc9121 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 14:45:26 -0500 Subject: [PATCH 08/17] feat multipart: add helper to parse boundary --- .../tiny_httpd_multipart_form_data.ml | 24 ++++++++ .../tiny_httpd_multipart_form_data.mli | 3 + src/multipart_form/utils_.ml | 59 ++----------------- 3 files changed, 31 insertions(+), 55 deletions(-) diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.ml b/src/multipart_form/tiny_httpd_multipart_form_data.ml index 6529f0e6..315b35e0 100644 --- a/src/multipart_form/tiny_httpd_multipart_form_data.ml +++ b/src/multipart_form/tiny_httpd_multipart_form_data.ml @@ -204,6 +204,30 @@ and parse_headers_rec (self : st) acc : Headers.t = ) ) +let parse_content_type (hs : Tiny_httpd.Headers.t) : _ option = + match Tiny_httpd.Headers.get "content-type" hs with + | None -> None + | Some s -> + (match String.split_on_char ';' s with + | "multipart/form-data" :: tl -> + let boundary = ref None in + List.iter + (fun s -> + match Utils_.split1_on ~c:'=' @@ String.trim s with + | Some ("boundary", "") -> () + | Some ("boundary", s) -> + let s = + if s.[0] = '"' && s.[String.length s - 1] = '"' then + String.sub s 1 (String.length s - 2) + else + s + in + boundary := Some (`boundary s) + | _ -> ()) + tl; + !boundary + | _ -> None) + module Private_ = struct type nonrec chunk = chunk = Delim | Eof | Read of int diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.mli b/src/multipart_form/tiny_httpd_multipart_form_data.mli index 009dbc47..a1747e5d 100644 --- a/src/multipart_form/tiny_httpd_multipart_form_data.mli +++ b/src/multipart_form/tiny_httpd_multipart_form_data.mli @@ -5,6 +5,9 @@ type st val create : ?buf_size:int -> ?out_buf_size:int -> boundary:string -> #Iostream.In.t -> st +val parse_content_type : Tiny_httpd.Headers.t -> [ `boundary of string ] option +(** Parse headers for [content-type: multipart/form-data; boundary=…] *) + type slice = Iostream.Slice.t type event = Part of Tiny_httpd.Headers.t | Read of slice | End_of_input diff --git a/src/multipart_form/utils_.ml b/src/multipart_form/utils_.ml index 6d56e698..a1f993ce 100644 --- a/src/multipart_form/utils_.ml +++ b/src/multipart_form/utils_.ml @@ -1,5 +1,3 @@ -(* module StringMap = Map.Make (String) *) - let string_eq ~a ~a_start ~b ~len : bool = assert (len <= String.length b); if String.length a >= a_start + len then ( @@ -14,56 +12,7 @@ let string_eq ~a ~a_start ~b ~len : bool = ) else false -let ends_with ~suffix ~suffix_length s = - let s_length = String.length s in - s_length >= suffix_length - && string_eq ~a:s ~a_start:(s_length - suffix_length) ~b:suffix - ~len:suffix_length - -let rec first_matching p = function - | [] -> None - | x :: xs -> - (match p x with - | Some y -> Some y - | None -> first_matching p xs) - -let[@inline] option_map f = function - | None -> None - | Some x -> Some (f x) - -let find_common_idx a b = - let rec go i = - if i <= 0 then - None - else if ends_with ~suffix:b ~suffix_length:i a then - Some (String.length a - i) - else - go (i - 1) - in - go (String.length b) - -(* -let[@inline] word = function - | "" -> [] - | w -> [ Some w ] - -let split_on_string ~pattern s = - let pattern_length = String.length pattern in - let rec go start acc = - match Stringext.find_from ~start s ~pattern with - | Some match_start -> - let before = String.sub s start (match_start - start) in - let new_acc = (None :: word before) @ acc in - let new_start = match_start + pattern_length in - go new_start new_acc - | None -> word (Stringext.string_after s start) @ acc - in - List.rev (go 0 []) - -let split_and_process_string ~boundary s = - let f = function - | None -> `Delim - | Some w -> `Word w - in - List.map f @@ split_on_string ~pattern:boundary s - *) +let split1_on ~c s = + match String.index s c with + | exception Not_found -> None + | i -> Some (String.sub s 0 i, String.sub s (i + 1) (String.length s - i - 1)) From 66f87b7bdac80b4cab55c9998317700419f72357 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 14:45:41 -0500 Subject: [PATCH 09/17] more tests --- tests/multipart_form/dune | 5 ++- .../{t1.expected => t_chunk.expected} | 0 tests/multipart_form/{t1.ml => t_chunk.ml} | 0 tests/multipart_form/t_content_type.expected | 3 ++ tests/multipart_form/t_content_type.ml | 32 +++++++++++++++++++ .../{t2.expected => t_parse.expected} | 0 tests/multipart_form/{t2.ml => t_parse.ml} | 0 7 files changed, 37 insertions(+), 3 deletions(-) rename tests/multipart_form/{t1.expected => t_chunk.expected} (100%) rename tests/multipart_form/{t1.ml => t_chunk.ml} (100%) create mode 100644 tests/multipart_form/t_content_type.expected create mode 100644 tests/multipart_form/t_content_type.ml rename tests/multipart_form/{t2.expected => t_parse.expected} (100%) rename tests/multipart_form/{t2.ml => t_parse.ml} (100%) diff --git a/tests/multipart_form/dune b/tests/multipart_form/dune index e45df39e..48e31cbb 100644 --- a/tests/multipart_form/dune +++ b/tests/multipart_form/dune @@ -1,4 +1,3 @@ - (tests - (names t1 t2) - (libraries tiny_httpd tiny_httpd.multipart-form-data)) + (names t_chunk t_parse t_content_type) + (libraries tiny_httpd tiny_httpd.multipart-form-data)) diff --git a/tests/multipart_form/t1.expected b/tests/multipart_form/t_chunk.expected similarity index 100% rename from tests/multipart_form/t1.expected rename to tests/multipart_form/t_chunk.expected diff --git a/tests/multipart_form/t1.ml b/tests/multipart_form/t_chunk.ml similarity index 100% rename from tests/multipart_form/t1.ml rename to tests/multipart_form/t_chunk.ml diff --git a/tests/multipart_form/t_content_type.expected b/tests/multipart_form/t_content_type.expected new file mode 100644 index 00000000..4f4a6a83 --- /dev/null +++ b/tests/multipart_form/t_content_type.expected @@ -0,0 +1,3 @@ +h: ["content-type": "yolo";"other": "whatev"], no content type +h ["content-type": "multipart/form-data; boundary=helloworld; junk";"other": "whatev"]: got "helloworld", expected "helloworld", same=true +h ["content-type": "multipart/form-data; lol=mdr; boundary=\"some quoted boundary\""]: got "some quoted boundary", expected "some quoted boundary", same=true diff --git a/tests/multipart_form/t_content_type.ml b/tests/multipart_form/t_content_type.ml new file mode 100644 index 00000000..9159ba45 --- /dev/null +++ b/tests/multipart_form/t_content_type.ml @@ -0,0 +1,32 @@ +module MFD = Tiny_httpd_multipart_form_data + +let pf = Printf.printf +let spf = Printf.sprintf + +let pp_headers hs = + spf "[%s]" (String.concat ";" @@ List.map (fun (k, v) -> spf "%S: %S" k v) hs) + +let test_headers h (exp : string option) = + match MFD.parse_content_type h, exp with + | Some (`boundary c1), Some c2 -> + pf "h %s: got %S, expected %S, same=%b\n" (pp_headers h) c1 c2 (c1 = c2) + | Some (`boundary c1), None -> + pf "h: %s, unexpected content type %S\n" (pp_headers h) c1 + | None, Some c2 -> pf "h: %s, expected content type %S\n" (pp_headers h) c2 + | None, None -> pf "h: %s, no content type\n" (pp_headers h) + +let () = + test_headers [ "content-type", "yolo"; "other", "whatev" ] None; + test_headers + [ + "content-type", "multipart/form-data; boundary=helloworld; junk"; + "other", "whatev"; + ] + (Some "helloworld"); + test_headers + [ + ( "content-type", + "multipart/form-data; lol=mdr; boundary=\"some quoted boundary\"" ); + ] + (Some "some quoted boundary"); + () diff --git a/tests/multipart_form/t2.expected b/tests/multipart_form/t_parse.expected similarity index 100% rename from tests/multipart_form/t2.expected rename to tests/multipart_form/t_parse.expected diff --git a/tests/multipart_form/t2.ml b/tests/multipart_form/t_parse.ml similarity index 100% rename from tests/multipart_form/t2.ml rename to tests/multipart_form/t_parse.ml From b966a9ecccaf132878a7574acc8775edd0eac117 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 14:56:35 -0500 Subject: [PATCH 10/17] feat multipart-form: expose content_disposition --- src/multipart_form/tiny_httpd_multipart_form_data.ml | 8 ++------ src/multipart_form/tiny_httpd_multipart_form_data.mli | 5 ++++- src/multipart_form/utils_.ml | 10 ++++++++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.ml b/src/multipart_form/tiny_httpd_multipart_form_data.ml index 315b35e0..0868899b 100644 --- a/src/multipart_form/tiny_httpd_multipart_form_data.ml +++ b/src/multipart_form/tiny_httpd_multipart_form_data.ml @@ -2,6 +2,7 @@ open Tiny_httpd module Slice = Iostream.Slice +module Content_disposition = Content_disposition let spf = Printf.sprintf @@ -216,12 +217,7 @@ let parse_content_type (hs : Tiny_httpd.Headers.t) : _ option = match Utils_.split1_on ~c:'=' @@ String.trim s with | Some ("boundary", "") -> () | Some ("boundary", s) -> - let s = - if s.[0] = '"' && s.[String.length s - 1] = '"' then - String.sub s 1 (String.length s - 2) - else - s - in + let s = Utils_.remove_quotes s in boundary := Some (`boundary s) | _ -> ()) tl; diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.mli b/src/multipart_form/tiny_httpd_multipart_form_data.mli index a1747e5d..dd7bbebc 100644 --- a/src/multipart_form/tiny_httpd_multipart_form_data.mli +++ b/src/multipart_form/tiny_httpd_multipart_form_data.mli @@ -1,4 +1,7 @@ -(** Parser for multipart/form-data *) +(** Streaming parser for multipart/form-data *) + +module Content_disposition = Content_disposition + type st (** Parser state *) diff --git a/src/multipart_form/utils_.ml b/src/multipart_form/utils_.ml index a1f993ce..cb3d8a5b 100644 --- a/src/multipart_form/utils_.ml +++ b/src/multipart_form/utils_.ml @@ -1,3 +1,5 @@ +let spf = Printf.sprintf + let string_eq ~a ~a_start ~b ~len : bool = assert (len <= String.length b); if String.length a >= a_start + len then ( @@ -16,3 +18,11 @@ let split1_on ~c s = match String.index s c with | exception Not_found -> None | i -> Some (String.sub s 0 i, String.sub s (i + 1) (String.length s - i - 1)) + +let remove_quotes s : string = + if String.length s < 2 then + s + else if s.[0] = '"' && s.[String.length s - 1] = '"' then + String.sub s 1 (String.length s - 2) + else + s From ce6119d4567213132c903b7a3c6c10ec9ba6c7d4 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 14:56:45 -0500 Subject: [PATCH 11/17] and tests --- tests/multipart_form/dune | 2 +- .../t_content_disposition.expected | 3 ++ tests/multipart_form/t_content_disposition.ml | 39 +++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 tests/multipart_form/t_content_disposition.expected create mode 100644 tests/multipart_form/t_content_disposition.ml diff --git a/tests/multipart_form/dune b/tests/multipart_form/dune index 48e31cbb..38e8d9f8 100644 --- a/tests/multipart_form/dune +++ b/tests/multipart_form/dune @@ -1,3 +1,3 @@ (tests - (names t_chunk t_parse t_content_type) + (names t_chunk t_parse t_content_type t_content_disposition) (libraries tiny_httpd tiny_httpd.multipart-form-data)) diff --git a/tests/multipart_form/t_content_disposition.expected b/tests/multipart_form/t_content_disposition.expected new file mode 100644 index 00000000..a44bee6c --- /dev/null +++ b/tests/multipart_form/t_content_disposition.expected @@ -0,0 +1,3 @@ +h: ["content-foobar": "yolo";"other": "whatev"], no content disp +h ["content-disposition": "form-data; name=helloworld; junk";"other": "whatev"]: got {kind="form-data"; name="helloworld"; filename=None}, expected {kind="form-data"; name="helloworld"; filename=None}, same=true +h ["content-disposition": "form-data; lol=mdr; filename=\"some quoted stuff\""]: got {kind="form-data"; name=None; filename="some quoted stuff"}, expected {kind="form-data"; name=None; filename="some quoted stuff"}, same=true diff --git a/tests/multipart_form/t_content_disposition.ml b/tests/multipart_form/t_content_disposition.ml new file mode 100644 index 00000000..355b6cf7 --- /dev/null +++ b/tests/multipart_form/t_content_disposition.ml @@ -0,0 +1,39 @@ +module MFD = Tiny_httpd_multipart_form_data + +let pf = Printf.printf +let spf = Printf.sprintf + +let pp_headers hs = + spf "[%s]" (String.concat ";" @@ List.map (fun (k, v) -> spf "%S: %S" k v) hs) + +let test_headers h (exp : _ option) = + match MFD.Content_disposition.parse h, exp with + | Some c1, Some c2 -> + pf "h %s: got %s, expected %s, same=%b\n" (pp_headers h) + (MFD.Content_disposition.to_string c1) + (MFD.Content_disposition.to_string c2) + (c1 = c2) + | Some c1, None -> + pf "h: %s, unexpected content disp %s\n" (pp_headers h) + (MFD.Content_disposition.to_string c1) + | None, Some c2 -> + pf "h: %s, expected content disp %s\n" (pp_headers h) + (MFD.Content_disposition.to_string c2) + | None, None -> pf "h: %s, no content disp\n" (pp_headers h) + +let () = + test_headers [ "content-foobar", "yolo"; "other", "whatev" ] None; + test_headers + [ + "content-disposition", "form-data; name=helloworld; junk"; + "other", "whatev"; + ] + (Some { kind = "form-data"; name = Some "helloworld"; filename = None }); + test_headers + [ + ( "content-disposition", + "form-data; lol=mdr; filename=\"some quoted stuff\"" ); + ] + (Some + { kind = "form-data"; name = None; filename = Some "some quoted stuff" }); + () From 8f0dac2dfe7c1dda766c7e0db2294458f9b8da57 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 15:32:08 -0500 Subject: [PATCH 12/17] missing file --- src/multipart_form/content_disposition.ml | 31 +++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/multipart_form/content_disposition.ml diff --git a/src/multipart_form/content_disposition.ml b/src/multipart_form/content_disposition.ml new file mode 100644 index 00000000..9f545729 --- /dev/null +++ b/src/multipart_form/content_disposition.ml @@ -0,0 +1,31 @@ +open Utils_ + +type t = { kind: string; name: string option; filename: string option } + +(** Simple display *) +let to_string (self : t) = + let stropt = function + | None -> "None" + | Some s -> spf "%S" s + in + spf "{kind=%S; name=%s; filename=%s}" self.kind (stropt self.name) + (stropt self.filename) + +let parse (hs : Tiny_httpd.Headers.t) : t option = + match Tiny_httpd.Headers.get "content-disposition" hs with + | None -> None + | Some s -> + (match String.split_on_char ';' s with + | [] -> + failwith (Printf.sprintf "multipart: invalid content-disposition %S" s) + | kind :: tl -> + let name = ref None in + let filename = ref None in + List.iter + (fun s -> + match Utils_.split1_on ~c:'=' @@ String.trim s with + | Some ("name", v) -> name := Some (Utils_.remove_quotes v) + | Some ("filename", v) -> filename := Some (Utils_.remove_quotes v) + | _ -> ()) + tl; + Some { kind; name = !name; filename = !filename }) From 0b34c966f7f8b3641cda622acc70eff918be4676 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 15:48:52 -0500 Subject: [PATCH 13/17] fix multipart: no \r\n before boundary after all --- .../tiny_httpd_multipart_form_data.ml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.ml b/src/multipart_form/tiny_httpd_multipart_form_data.ml index 0868899b..9807ff0f 100644 --- a/src/multipart_form/tiny_httpd_multipart_form_data.ml +++ b/src/multipart_form/tiny_httpd_multipart_form_data.ml @@ -46,7 +46,7 @@ let create ?(buf_size = 64 * 1024) ?(out_buf_size = 8 * 1024) ~boundary ic : st type chunk = Delim | Eof | Read of int -let[@inline] min_len_ (self : st) : int = 4 + String.length self.boundary +let[@inline] min_len_ (self : st) : int = 2 + String.length self.boundary exception Found_boundary of int @@ -74,16 +74,14 @@ let rec read_chunk_ (self : st) buf i_buf len : chunk = ) else ( try let i = ref 0 in - let end_pos = min len self.buf.len - 4 - String.length self.boundary in + let end_pos = min len self.buf.len - 2 - String.length self.boundary in while !i <= end_pos do if - Bytes.unsafe_get self.buf.bs !i = '\r' - && Bytes.unsafe_get self.buf.bs (!i + 1) = '\n' - && Bytes.unsafe_get self.buf.bs (!i + 2) = '-' - && Bytes.unsafe_get self.buf.bs (!i + 3) = '-' + Bytes.unsafe_get self.buf.bs !i = '-' + && Bytes.unsafe_get self.buf.bs (!i + 1) = '-' && Utils_.string_eq ~a:(Bytes.unsafe_to_string self.buf.bs) - ~a_start:(!i + 4) ~b:self.boundary + ~a_start:(!i + 2) ~b:self.boundary ~len:(String.length self.boundary) then raise_notrace (Found_boundary !i); @@ -95,7 +93,7 @@ let rec read_chunk_ (self : st) buf i_buf len : chunk = Read n_read with | Found_boundary 0 -> - shift_left_ self.buf (4 + String.length self.boundary); + shift_left_ self.buf (2 + String.length self.boundary); Delim | Found_boundary n -> let n_read = min n len in @@ -191,7 +189,6 @@ and parse_headers_rec (self : st) acc : Headers.t = ) | i -> let line = Bytes.sub_string self.buf_out.bs 0 i in - Printf.eprintf "parse header line %S\n%!" line; shift_left_ self.buf_out (i + 2); if line = "" then List.rev acc From 099777b593f8a3fa72ecce5c584ca0ab14430a38 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 15:49:08 -0500 Subject: [PATCH 14/17] test --- tests/multipart_form/t_chunk.ml | 12 ++++-------- tests/multipart_form/t_parse.ml | 13 +++---------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/tests/multipart_form/t_chunk.ml b/tests/multipart_form/t_chunk.ml index 346803ed..3d46052a 100644 --- a/tests/multipart_form/t_chunk.ml +++ b/tests/multipart_form/t_chunk.ml @@ -38,17 +38,13 @@ let test input_str = let () = pf "T1\n"; test - "hello\r\n\ - --YOLO\n\ + "hello--YOLO\n\ \ world\n\ - \ what is the meaning of\r\n\ - --YOLOthis??\r\n\ - --YOLOok ok ok\r\n\ - --YOLO"; + \ what is the meaning of--YOLOthis??--YOLOok ok ok--YOLO"; pf "T2\n"; - test "\r\n--YOLO\r\n--YOLOah bon\r\n--YOLOaight\r\n--YOLO\r\n--YOLO"; + test "--YOLO--YOLOah bon--YOLOaight--YOLO--YOLO"; pf "T3\n"; test - (spf "\r\n--YOLO%s\r\n--YOLO\r\n--YOLO%s\r\n--YOLO%s" (String.make 400 'a') + (spf "--YOLO%s--YOLO--YOLO%s--YOLO%s" (String.make 400 'a') (String.make 512 'b') (String.make 400 'c')); () diff --git a/tests/multipart_form/t_parse.ml b/tests/multipart_form/t_parse.ml index 955968b5..4eb27b9c 100644 --- a/tests/multipart_form/t_parse.ml +++ b/tests/multipart_form/t_parse.ml @@ -41,34 +41,27 @@ let test input_str = let () = pf "T1\n"; test - "\r\n\ - --YOLO\r\n\ + "--YOLO\r\n\ some-super-cool: header here\r\n\ ohlook: here\r\n\ \r\n\ and now for the b-o-d-y 👏\n\ - \r\n\ --YOLO\r\n\ more: headers\r\n\ \r\n\ and another body\r\n\ - \r\n\ --YOLO--"; pf "T1\n"; test (spf - "\r\n\ - --YOLO\r\n\ + "--YOLO\r\n\ some-super-cool: header here\r\n\ ohlook: here\r\n\ \r\n\ and now for the bigger body:\n\ %s\n\ - \r\n\ --YOLO\r\n\ more: headers\r\n\ \r\n\ - and another body\r\n\ - --YOLO--" - (String.make 500 'a')); + and another body--YOLO--" (String.make 500 'a')); () From 21c0f7f25d553e91619d586a52983a2a592de943 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 2 Dec 2024 15:58:16 -0500 Subject: [PATCH 15/17] feat: require \r\n before all boundaries but the first --- .../tiny_httpd_multipart_form_data.ml | 31 ++++++++++++++++--- tests/multipart_form/t_chunk.ml | 9 ++++-- tests/multipart_form/t_parse.ml | 7 ++++- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.ml b/src/multipart_form/tiny_httpd_multipart_form_data.ml index 9807ff0f..ce4e8326 100644 --- a/src/multipart_form/tiny_httpd_multipart_form_data.ml +++ b/src/multipart_form/tiny_httpd_multipart_form_data.ml @@ -27,6 +27,7 @@ type st = { boundary: string; ic: Iostream.In.t; buf: buf; (** Used to split on the boundary *) + mutable first: bool; (** Are we parsing the first boundary? *) mutable eof_split: bool; buf_out: buf; (** Used to return output slices *) mutable st_out: out_state; @@ -37,6 +38,7 @@ let create ?(buf_size = 64 * 1024) ?(out_buf_size = 8 * 1024) ~boundary ic : st let ic = (ic : #Iostream.In.t :> Iostream.In.t) in { boundary; + first = true; ic; buf = { bs = Bytes.create buf_size; len = 0 }; eof_split = false; @@ -46,7 +48,14 @@ let create ?(buf_size = 64 * 1024) ?(out_buf_size = 8 * 1024) ~boundary ic : st type chunk = Delim | Eof | Read of int -let[@inline] min_len_ (self : st) : int = 2 + String.length self.boundary +let[@inline] prefix_size_ (self : st) : int = + if self.first then + 2 + else + 4 + +let[@inline] min_len_ (self : st) : int = + prefix_size_ self + String.length self.boundary exception Found_boundary of int @@ -74,15 +83,27 @@ let rec read_chunk_ (self : st) buf i_buf len : chunk = ) else ( try let i = ref 0 in - let end_pos = min len self.buf.len - 2 - String.length self.boundary in + let end_pos = + min len self.buf.len - prefix_size_ self - String.length self.boundary + in while !i <= end_pos do if - Bytes.unsafe_get self.buf.bs !i = '-' + self.first + && Bytes.unsafe_get self.buf.bs !i = '-' && Bytes.unsafe_get self.buf.bs (!i + 1) = '-' && Utils_.string_eq ~a:(Bytes.unsafe_to_string self.buf.bs) ~a_start:(!i + 2) ~b:self.boundary ~len:(String.length self.boundary) + || (not self.first) + && Bytes.unsafe_get self.buf.bs !i = '\r' + && Bytes.unsafe_get self.buf.bs (!i + 1) = '\n' + && Bytes.unsafe_get self.buf.bs (!i + 2) = '-' + && Bytes.unsafe_get self.buf.bs (!i + 3) = '-' + && Utils_.string_eq + ~a:(Bytes.unsafe_to_string self.buf.bs) + ~a_start:(!i + 4) ~b:self.boundary + ~len:(String.length self.boundary) then raise_notrace (Found_boundary !i); incr i @@ -93,7 +114,8 @@ let rec read_chunk_ (self : st) buf i_buf len : chunk = Read n_read with | Found_boundary 0 -> - shift_left_ self.buf (2 + String.length self.boundary); + shift_left_ self.buf (prefix_size_ self + String.length self.boundary); + self.first <- false; Delim | Found_boundary n -> let n_read = min n len in @@ -189,6 +211,7 @@ and parse_headers_rec (self : st) acc : Headers.t = ) | i -> let line = Bytes.sub_string self.buf_out.bs 0 i in + Printf.eprintf "parse header line %S\n%!" line; shift_left_ self.buf_out (i + 2); if line = "" then List.rev acc diff --git a/tests/multipart_form/t_chunk.ml b/tests/multipart_form/t_chunk.ml index 3d46052a..897e0b60 100644 --- a/tests/multipart_form/t_chunk.ml +++ b/tests/multipart_form/t_chunk.ml @@ -40,11 +40,14 @@ let () = test "hello--YOLO\n\ \ world\n\ - \ what is the meaning of--YOLOthis??--YOLOok ok ok--YOLO"; + \ what is the meaning of\r\n\ + --YOLOthis??\r\n\ + --YOLOok ok ok\r\n\ + --YOLO"; pf "T2\n"; - test "--YOLO--YOLOah bon--YOLOaight--YOLO--YOLO"; + test "--YOLO\r\n--YOLOah bon\r\n--YOLOaight\r\n--YOLO\r\n--YOLO"; pf "T3\n"; test - (spf "--YOLO%s--YOLO--YOLO%s--YOLO%s" (String.make 400 'a') + (spf "--YOLO%s\r\n--YOLO\r\n--YOLO%s\r\n--YOLO%s" (String.make 400 'a') (String.make 512 'b') (String.make 400 'c')); () diff --git a/tests/multipart_form/t_parse.ml b/tests/multipart_form/t_parse.ml index 4eb27b9c..19225b78 100644 --- a/tests/multipart_form/t_parse.ml +++ b/tests/multipart_form/t_parse.ml @@ -46,10 +46,12 @@ let () = ohlook: here\r\n\ \r\n\ and now for the b-o-d-y 👏\n\ + \r\n\ --YOLO\r\n\ more: headers\r\n\ \r\n\ and another body\r\n\ + \r\n\ --YOLO--"; pf "T1\n"; test @@ -60,8 +62,11 @@ let () = \r\n\ and now for the bigger body:\n\ %s\n\ + \r\n\ --YOLO\r\n\ more: headers\r\n\ \r\n\ - and another body--YOLO--" (String.make 500 'a')); + and another body\r\n\ + --YOLO--" + (String.make 500 'a')); () From 98755431926cfc55f10f9ba83cc91a34b088e7ef Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 3 Dec 2024 09:44:16 -0500 Subject: [PATCH 16/17] remove debug line --- src/multipart_form/tiny_httpd_multipart_form_data.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/src/multipart_form/tiny_httpd_multipart_form_data.ml b/src/multipart_form/tiny_httpd_multipart_form_data.ml index ce4e8326..229e8537 100644 --- a/src/multipart_form/tiny_httpd_multipart_form_data.ml +++ b/src/multipart_form/tiny_httpd_multipart_form_data.ml @@ -211,7 +211,6 @@ and parse_headers_rec (self : st) acc : Headers.t = ) | i -> let line = Bytes.sub_string self.buf_out.bs 0 i in - Printf.eprintf "parse header line %S\n%!" line; shift_left_ self.buf_out (i + 2); if line = "" then List.rev acc From 731dd7de51279ce8685719bfdd2bc366dc7589de Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 3 Dec 2024 10:13:33 -0500 Subject: [PATCH 17/17] add a form to echo.ml for manual testing --- examples/dune | 2 +- examples/echo.ml | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/examples/dune b/examples/dune index d2c19915..08d06886 100644 --- a/examples/dune +++ b/examples/dune @@ -12,7 +12,7 @@ (name echo) (flags :standard -warn-error -a+8) (modules echo vfs) - (libraries tiny_httpd logs tiny_httpd_camlzip)) + (libraries tiny_httpd logs tiny_httpd_camlzip tiny_httpd.multipart-form-data)) (executable (name writer) diff --git a/examples/echo.ml b/examples/echo.ml index f3d0f2af..af133187 100644 --- a/examples/echo.ml +++ b/examples/echo.ml @@ -1,5 +1,6 @@ open Tiny_httpd_core module Log = Tiny_httpd.Log +module MFD = Tiny_httpd_multipart_form_data let now_ = Unix.gettimeofday @@ -78,6 +79,58 @@ let setup_logging () = Logs.set_reporter @@ Logs.format_reporter (); Logs.set_level ~all:true (Some Logs.Debug) +let setup_upload server : unit = + Server.add_route_handler_stream ~meth:`POST server + Route.(exact "upload" @/ return) + (fun req -> + let (`boundary boundary) = + match MFD.parse_content_type req.headers with + | Some b -> b + | None -> Response.fail_raise ~code:400 "no boundary found" + in + + let st = MFD.create ~boundary req.body in + let tbl = Hashtbl.create 16 in + let cur = ref "" in + let cur_kind = ref "" in + let buf = Buffer.create 16 in + let rec loop () = + match MFD.next st with + | End_of_input -> + if !cur <> "" then + Hashtbl.add tbl !cur (!cur_kind, Buffer.contents buf) + | Part headers -> + if !cur <> "" then + Hashtbl.add tbl !cur (!cur_kind, Buffer.contents buf); + (match MFD.Content_disposition.parse headers with + | Some { kind; name = Some name; filename = _ } -> + cur := name; + cur_kind := kind; + Buffer.clear buf; + loop () + | _ -> Response.fail_raise ~code:400 "content disposition missing") + | Read sl -> + Buffer.add_subbytes buf sl.bytes sl.off sl.len; + loop () + in + loop (); + + let open Tiny_httpd_html in + let data = + Hashtbl.fold + (fun name (kind, data) acc -> + Printf.sprintf "%S (kind: %S): %S" name kind data :: acc) + tbl [] + in + let html = + body [] + [ + pre [] + [ txt (Printf.sprintf "{\n%s\n}" @@ String.concat "\n" data) ]; + ] + in + Response.make_string ~code:201 @@ Ok (to_string_top html)) + let () = let port_ = ref 8080 in let j = ref 32 in @@ -198,6 +251,8 @@ let () = ~dir_behavior:Tiny_httpd.Dir.Index_or_lists ()) ~vfs:Vfs.vfs ~prefix:"vfs"; + setup_upload server; + (* main page *) Server.add_route_handler server Route.(return) @@ -267,6 +322,34 @@ let () = txt " (POST) to log out"; ]; ]; + li [] + [ + form + [ + A.action "/upload"; + A.enctype "multipart/form-data"; + A.target "_self"; + A.method_ "POST"; + ] + [ + label [] [ txt "my beautiful form" ]; + input [ A.type_ "file"; A.name "file1" ]; + input [ A.type_ "file"; A.name "file2" ]; + input + [ + A.type_ "text"; + A.name "a"; + A.placeholder "text A"; + ]; + input + [ + A.type_ "text"; + A.name "b"; + A.placeholder "text B"; + ]; + input [ A.type_ "submit" ]; + ]; + ]; ]; ]; ]