From e6736e7cb52be5d05fb5bab4963bbe08ad70d77a Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Tue, 12 Sep 2023 19:51:58 +0200 Subject: [PATCH] Add temporary changes from https://github.com/nfdi4plants/ARCtrl/pull/189 --- src/arc-export/ARCExtension.fs | 107 ++++++++++++++++++++++ src/arc-export/ArcSummaryMarkdown.fs | 29 +----- src/arc-export/FileSystemTreeExtension.fs | 55 +++++++++++ src/arc-export/FileTree.fs | 4 - src/arc-export/Program.fs | 6 +- src/arc-export/arc-export.fsproj | 3 +- 6 files changed, 172 insertions(+), 32 deletions(-) create mode 100644 src/arc-export/ARCExtension.fs create mode 100644 src/arc-export/FileSystemTreeExtension.fs delete mode 100644 src/arc-export/FileTree.fs diff --git a/src/arc-export/ARCExtension.fs b/src/arc-export/ARCExtension.fs new file mode 100644 index 0000000..67fb3f6 --- /dev/null +++ b/src/arc-export/ARCExtension.fs @@ -0,0 +1,107 @@ +module ARCExtension + +open FileSystemTreeExtension + +open ARCtrl +open ARCtrl.FileSystem + + +//temporarily add implementations from https://github.com/nfdi4plants/ARCtrl/pull/189 +type ARC with + /// + /// Returns the FileSystemTree of the ARC with only the registered files and folders included. + /// + /// Wether or not to ignore hidden files and folders starting with '.'. If true, no hidden files are included in the result. (default: true) + member this.GetRegisteredPayload(?IgnoreHidden:bool) = + + let isaCopy = this.ISA |> Option.map (fun i -> i.Copy()) // not sure if needed, but let's be safe + + let registeredStudies = + isaCopy + |> Option.map (fun isa -> isa.Studies.ToArray()) // to-do: isa.RegisteredStudies + |> Option.defaultValue [||] + + let registeredAssays = + registeredStudies + |> Array.map (fun s -> s.Assays.ToArray()) // to-do: s.RegisteredAssays + |> Array.concat + + let includeRootFiles : Set = + set [ + "isa.investigation.xlsx" + "README.md" + ] + + let includeStudyFiles = + registeredStudies + |> Array.map (fun s -> + let studyFoldername = $"studies/{s.Identifier}" + + set [ + yield $"{studyFoldername}/isa.study.xlsx" + yield $"{studyFoldername}/README.md" + + //just allow any constructed path from cell values. there may be occasions where this includes wrong files, but its good enough for now. + for (kv) in s.Tables[0].Values do + yield kv.Value.AsFreeText // from arc root + yield $"{studyFoldername}/resources/{kv.Value.AsFreeText}" // from study root > resources + yield $"{studyFoldername}/protocols/{kv.Value.AsFreeText}" // from study root > protocols + ] + ) + |> Set.unionMany + + let includeAssayFiles = + registeredAssays + |> Array.map (fun a -> + let assayFoldername = $"assays/{a.Identifier}" + + set [ + yield $"{assayFoldername}/isa.assay.xlsx" + yield $"{assayFoldername}/README.md" + + //just allow any constructed path from cell values. there may be occasions where this includes wrong files, but its good enough for now. + for (kv) in a.Tables[0].Values do + yield kv.Value.AsFreeText // from arc root + yield $"{assayFoldername}/dataset/{kv.Value.AsFreeText}" // from assay root > dataset + yield $"{assayFoldername}/protocols/{kv.Value.AsFreeText}" // from assay root > protocols + ] + ) + |> Set.unionMany + + + let includeFiles = Set.unionMany [includeRootFiles; includeStudyFiles; includeAssayFiles] + + let ignoreHidden = defaultArg IgnoreHidden true + let fsCopy = this.FileSystem.Copy() // not sure if needed, but let's be safe + + fsCopy.Tree + |> FileSystemTree.toFilePaths() + |> Array.filter (fun p -> + p.StartsWith("workflows") + || p.StartsWith("runs") + || includeFiles.Contains(p) + ) + |> FileSystemTree.fromFilePaths + |> fun tree -> if ignoreHidden then tree |> FileSystemTree.filterFiles (fun n -> not (n.StartsWith("."))) else Some tree + |> Option.bind (fun tree -> if ignoreHidden then tree |> FileSystemTree.filterFolders (fun n -> not (n.StartsWith("."))) else Some tree) + |> Option.defaultValue (FileSystemTree.fromFilePaths [||]) + + /// + /// Returns the FileSystemTree of the ARC with only and folders included that are considered additional payload. + /// + /// Wether or not to ignore hidden files and folders starting with '.'. If true, no hidden files are included in the result. (default: true) + + member this.GetAdditionalPayload(?IgnoreHidden:bool) = + let ignoreHidden = defaultArg IgnoreHidden true + let registeredPayload = + this.GetRegisteredPayload() + |> FileSystemTree.toFilePaths() + |> set + + this.FileSystem.Copy().Tree + |> FileSystemTree.toFilePaths() + |> Array.filter (fun p -> not (registeredPayload.Contains(p))) + |> FileSystemTree.fromFilePaths + |> fun tree -> if ignoreHidden then tree |> FileSystemTree.filterFiles (fun n -> not (n.StartsWith("."))) else Some tree + |> Option.bind (fun tree -> if ignoreHidden then tree |> FileSystemTree.filterFolders (fun n -> not (n.StartsWith("."))) else Some tree) + |> Option.defaultValue (FileSystemTree.fromFilePaths [||]) \ No newline at end of file diff --git a/src/arc-export/ArcSummaryMarkdown.fs b/src/arc-export/ArcSummaryMarkdown.fs index 1408d5b..f1298b0 100644 --- a/src/arc-export/ArcSummaryMarkdown.fs +++ b/src/arc-export/ArcSummaryMarkdown.fs @@ -1,12 +1,14 @@ module ArcSummaryMarkdown +open FileSystemTreeExtension + open ARCtrl open ARCtrl.FileSystem open ARCtrl.NET let [] MARKDOWN_TEMPLATE = """## [Data set] [[ARC_TITLE]] -### File contents: +### Registered ARC content: [[FILE_TREE]] """ @@ -16,25 +18,6 @@ type ARCtrl.FileSystem.FileSystemTree with static member createItemString (level:int) (item: string) = $"""{String.replicate level " "}- {item}""" - - member this.FilterNodes (predicate: string -> bool) = - let rec loop (parent: FileSystemTree) = - match parent with - | File n -> - if predicate n then Some (FileSystemTree.File n) else None - | Folder (n, children) -> - if predicate n then - let filteredChildren = children |> Array.choose loop - if Array.isEmpty filteredChildren then - None - else - Some (FileSystemTree.Folder (n,filteredChildren)) - else - None - loop this - - static member filterNodes (predicate: string -> bool) = - fun (tree: FileSystemTree) -> tree.FilterNodes predicate static member toMarkdownTOC (tree: FileSystemTree) = let rec loop (level:int) (acc:string list) (fs: FileSystemTree) = @@ -54,12 +37,6 @@ type ARCtrl.FileSystem.FileSystemTree with finalAccum tree - |> FileSystemTree.filterNodes( - fun item -> - let predicate = not (item.StartsWith(".")) - predicate - ) - |> Option.get |> loop 0 [] |> Seq.rev |> String.concat System.Environment.NewLine \ No newline at end of file diff --git a/src/arc-export/FileSystemTreeExtension.fs b/src/arc-export/FileSystemTreeExtension.fs new file mode 100644 index 0000000..c9bcced --- /dev/null +++ b/src/arc-export/FileSystemTreeExtension.fs @@ -0,0 +1,55 @@ +module FileSystemTreeExtension + +open ARCtrl +open ARCtrl.FileSystem + +//temporarily add implementations from https://github.com/nfdi4plants/ARCtrl/pull/189 +type ARCtrl.FileSystem.FileSystemTree with + + member this.FilterFiles (predicate: string -> bool) = + let rec loop (parent: FileSystemTree) = + match parent with + | File n -> + if predicate n then Some (File n) else None + | Folder (n, children) -> + Folder (n, children |> Array.choose loop) + |> Some + + loop this + + static member filterFiles (predicate: string -> bool) = + fun (tree: FileSystemTree) -> tree.FilterFiles predicate + + member this.FilterFolders (predicate: string -> bool) = + let rec loop (parent: FileSystemTree) = + match parent with + | File n -> Some (File n) + | Folder (n, children) -> + if predicate n then + Folder (n, children |> Array.choose loop) + |> Some + else + None + loop this + + static member filterFolders (predicate: string -> bool) = + fun (tree: FileSystemTree) -> tree.FilterFolders predicate + + member this.Filter (predicate: string -> bool) = + let rec loop (parent: FileSystemTree) = + match parent with + | File n -> + if predicate n then Some (FileSystemTree.File n) else None + | Folder (n, children) -> + if predicate n then + let filteredChildren = children |> Array.choose loop + if Array.isEmpty filteredChildren then + None + else + Some (FileSystemTree.Folder (n,filteredChildren)) + else + None + loop this + + static member filter (predicate: string -> bool) = + fun (tree: FileSystemTree) -> tree.Filter predicate \ No newline at end of file diff --git a/src/arc-export/FileTree.fs b/src/arc-export/FileTree.fs deleted file mode 100644 index fa273d4..0000000 --- a/src/arc-export/FileTree.fs +++ /dev/null @@ -1,4 +0,0 @@ -module FileTree - -//open ARCtrl.NET - diff --git a/src/arc-export/Program.fs b/src/arc-export/Program.fs index b043396..d957f32 100644 --- a/src/arc-export/Program.fs +++ b/src/arc-export/Program.fs @@ -4,6 +4,8 @@ open ARCtrl.FileSystem open ARCtrl.ISA open ARCtrl.NET open Argu +open FileSystemTreeExtension +open ARCExtension open ArcSummaryMarkdown open ARCtrl.NET.Contract @@ -48,10 +50,12 @@ try let jsonFile = Path.Combine(outPath,"arc.json") let mdfile = Path.Combine(outPath,"arc-summary.md") + let inv, mdContent = try let arc = loadARCCustom arcPath + let registeredPayload = arc.GetRegisteredPayload(IgnoreHidden = true) let inv = arc.ISA |> Option.get getAllFilePaths arcPath |> Seq.iter (printfn "%s") @@ -59,7 +63,7 @@ try inv, MARKDOWN_TEMPLATE .Replace("[[ARC_TITLE]]", inv.Title |> Option.defaultValue "Untitled ARC") - .Replace("[[FILE_TREE]]", FileSystemTree.toMarkdownTOC arc.FileSystem.Tree) + .Replace("[[FILE_TREE]]", FileSystemTree.toMarkdownTOC registeredPayload) with | err -> printfn "Could not read investigation, writing empty arc json." diff --git a/src/arc-export/arc-export.fsproj b/src/arc-export/arc-export.fsproj index d62a699..3d6142f 100644 --- a/src/arc-export/arc-export.fsproj +++ b/src/arc-export/arc-export.fsproj @@ -9,7 +9,8 @@ - + +