From ee1091f6ece4d248dc18fa5e045cbbd525c60d76 Mon Sep 17 00:00:00 2001 From: HLWeil Date: Thu, 6 Jun 2024 13:04:45 +0200 Subject: [PATCH 1/4] rework datamap to be represented as a row of records --- .github/workflows/manage-issues.yml | 1 - src/Core/ARCtrl.Core.fsproj | 1 + src/Core/Data.fs | 48 +++++++-- src/Core/DataContext.fs | 71 ++++++++++++ src/Core/Table/DataMap.fs | 160 +++++++++++++--------------- 5 files changed, 190 insertions(+), 91 deletions(-) create mode 100644 src/Core/DataContext.fs diff --git a/.github/workflows/manage-issues.yml b/.github/workflows/manage-issues.yml index da2d3dd0..34b85b6c 100644 --- a/.github/workflows/manage-issues.yml +++ b/.github/workflows/manage-issues.yml @@ -5,7 +5,6 @@ on: types: - opened - reopened - - transferred jobs: label_issues: diff --git a/src/Core/ARCtrl.Core.fsproj b/src/Core/ARCtrl.Core.fsproj index f81266b6..f37a0047 100644 --- a/src/Core/ARCtrl.Core.fsproj +++ b/src/Core/ARCtrl.Core.fsproj @@ -18,6 +18,7 @@ + diff --git a/src/Core/Data.fs b/src/Core/Data.fs index 18649f6d..5c028f45 100644 --- a/src/Core/Data.fs +++ b/src/Core/Data.fs @@ -4,11 +4,28 @@ open ARCtrl open ARCtrl.Helper open Fable.Core +module DataAux = + + let nameFromPathAndSelector (path : string) (selector : string) = + sprintf "%s#%s" path selector + + let pathAndSelectorFromName (name : string) = + let parts = name.Split('#') + if parts.Length = 2 then + parts.[0], Some parts.[1] + else + name, None + [] -type Data(?id,?name,?dataType,?format,?selectorFormat,?comments) = +type Data(?id,?name : string,?dataType,?format,?selectorFormat,?comments) = let mutable _id : URI option = id - let mutable _name : string option = name + let mutable _filePath,_selector = + match name with + | Some n -> + let p,s = DataAux.pathAndSelectorFromName n + Some p, s + | None -> None, None let mutable _dataType : DataFile option = dataType let mutable _format : string option = format let mutable _selectorFormat : URI option = selectorFormat @@ -19,8 +36,28 @@ type Data(?id,?name,?dataType,?format,?selectorFormat,?comments) = and set(id) = _id <- id member this.Name - with get() = _name - and set(name) = _name <- name + with get() = + match _filePath,_selector with + | Some p, Some s -> DataAux.nameFromPathAndSelector p s |> Some + | Some p, None -> p |> Some + | None, _ -> None + and set(name) = + match name with + | Some n -> + let p,s = DataAux.pathAndSelectorFromName n + _filePath <- Some p + _selector <- s + | None -> + _filePath <- None + _selector <- None + + member this.FilePath + with get() = _filePath + and set(filePath) = _filePath <- filePath + + member this.Selector + with get() = _selector + and set(selector) = _selector <- selector member this.DataType with get() = _dataType @@ -36,8 +73,7 @@ type Data(?id,?name,?dataType,?format,?selectorFormat,?comments) = member this.Comments with get() = _comments - and set(comments) = _comments <- comments - + and set(comments) = _comments <- comments static member make id name dataType format selectorFormat comments = Data(?id=id,?name=name,?dataType=dataType,?format=format,?selectorFormat=selectorFormat,?comments=comments) diff --git a/src/Core/DataContext.fs b/src/Core/DataContext.fs new file mode 100644 index 00000000..f5b20279 --- /dev/null +++ b/src/Core/DataContext.fs @@ -0,0 +1,71 @@ +namespace ARCtrl + +open ARCtrl +open ARCtrl.Helper +open Fable.Core + +type DataContext(?id,?name : string,?dataType,?format,?selectorFormat, ?explication, ?unit, ?objectType, ?description, ?generatedBy, ?comments) = + + inherit Data(?id = id,?name = name, ?dataType = dataType, ?format = format, ?selectorFormat = selectorFormat, ?comments = comments) + + let mutable _explication : OntologyAnnotation option = explication + let mutable _unit : OntologyAnnotation option = unit + let mutable _objectType : OntologyAnnotation option = objectType + let mutable _description : string option = description + let mutable _generatedBy : string option = generatedBy + + member this.Explication + with get() = _explication + and set(explication) = _explication <- explication + + member this.Unit + with get() = _unit + and set(unit) = _unit <- unit + + member this.ObjectType + with get() = _objectType + and set(objectType) = _objectType <- objectType + + member this.Description + with get() = _description + and set(description) = _description <- description + + member this.GeneratedBy + with get() = _generatedBy + and set(generatedBy) = _generatedBy <- generatedBy + + + member this.Copy() = + let copy = new DataContext() + copy.ID <- this.ID + copy.Name <- this.Name + copy.DataType <- this.DataType + copy.Format <- this.Format + copy.SelectorFormat <- this.SelectorFormat + copy.Explication <- this.Explication + copy.Unit <- this.Unit + copy.ObjectType <- this.ObjectType + copy.Description <- this.Description + copy.GeneratedBy <- this.GeneratedBy + copy.Comments <- this.Comments + copy + + override this.GetHashCode() = + [| + HashCodes.boxHashOption this.ID + HashCodes.boxHashOption this.Name + HashCodes.boxHashOption this.DataType + HashCodes.boxHashOption this.Format + HashCodes.boxHashOption this.SelectorFormat + HashCodes.boxHashSeq this.Comments + HashCodes.boxHashOption this.Explication + HashCodes.boxHashOption this.Unit + HashCodes.boxHashOption this.ObjectType + HashCodes.boxHashOption this.Description + HashCodes.boxHashOption this.GeneratedBy + |] + |> HashCodes.boxHashArray + |> fun x -> x :?> int + + override this.Equals(obj) = + HashCodes.hash this = HashCodes.hash obj \ No newline at end of file diff --git a/src/Core/Table/DataMap.fs b/src/Core/Table/DataMap.fs index 3b7fedb3..9dd417c2 100644 --- a/src/Core/Table/DataMap.fs +++ b/src/Core/Table/DataMap.fs @@ -8,131 +8,123 @@ open Fable.Core [] module DataMapAux = - - [] - let dataMapName = "DataMap" - - let dataHeader = CompositeHeader.Input IOType.Data [] - let dataShortHand = "Data" - - let explication = OntologyAnnotation("Clarification","NCIT","http://purl.obolibrary.org/obo/NCIT_C94778") - + let dataShortHand = "Data" + [] let explicationShortHand = "Explication" - let explicationHeader = CompositeHeader.Parameter explication - - let unit = OntologyAnnotation("Unit","UO","http://purl.obolibrary.org/obo/UO_0000000") - [] let unitShortHand = "Unit" - let unitHeader = CompositeHeader.Parameter unit - - let objectType = OntologyAnnotation("Data Type","NCIT","http://purl.obolibrary.org/obo/NCIT_C42645") - - let objectTypeHeader = CompositeHeader.Parameter objectType - [] let objectTypeShortHand = "Object Type" - let descriptionHeader = CompositeHeader.FreeText "Description" - [] let descriptionShortHand = "Description" - let generatedByHeader = CompositeHeader.FreeText "Generated By" - [] let generatedByShortHand = "Generated By" - let allowedHeaders = - [dataHeader; explicationHeader; unitHeader; objectTypeHeader; descriptionHeader; generatedByHeader] - - let validate (headers : ResizeArray) (values : System.Collections.Generic.Dictionary) (raiseException : bool) = - let headersAreValid = - headers - |> Seq.exists (fun h -> - let hasForeignHeader = - not (allowedHeaders |> List.exists (fun ah -> ah = h)) - if raiseException && hasForeignHeader then - failwithf "Header %O is not allowed in DataMap" h - hasForeignHeader - ) - let tableIsValid = ArcTableAux.SanityChecks.validate headers values raiseException - headersAreValid && tableIsValid - -type DataMap(headers: ResizeArray, values: System.Collections.Generic.Dictionary) = - - let _ = DataMapAux.validate headers values true + let getOntologyColumn (f : DataContext -> OntologyAnnotation option) (dataContexts : ResizeArray) = + dataContexts + |> Seq.map (fun dc -> + match f dc with + | Some s -> CompositeCell.Term(s) + | None -> CompositeCell.emptyTerm + ) + |> ResizeArray + + let getStringColumn (f : DataContext -> string option) (dataContexts : ResizeArray) = + dataContexts + |> Seq.map (fun dc -> + match f dc with + | Some s -> CompositeCell.FreeText(s) + | None -> CompositeCell.emptyFreeText + ) + |> ResizeArray + + let setOntologyColumn (f : DataContext -> OntologyAnnotation option -> unit) (column : CompositeCell []) (dataContexts : ResizeArray) = + column + |> Seq.iteri (fun i cell -> + match cell with + | CompositeCell.Term s -> f (dataContexts.[i]) (Some s) + | _ -> () + ) - let table = ArcTable(DataMapAux.dataMapName, headers, values) - let mutable staticHash = 0 + let setStringColumn (f : DataContext -> string option -> unit) (column : CompositeCell []) (dataContexts : ResizeArray) = + column + |> Seq.iteri (fun i cell -> + match cell with + | CompositeCell.FreeText s -> f (dataContexts.[i]) (Some s) + | _ -> () + ) - member this.Headers = table.Headers - member this.Values = table.Values - member this.StaticHash with get() = staticHash and set(value) = staticHash <- value + module SanityChecks = + + let rowIndexInBoundaries (row : int) (dataContexts : ResizeArray) = + if row < 0 then + failwith "Row index must be greater or equal to 0." + if row >= dataContexts.Count then + failwith "Row index must be less than the number of rows." - static member init() = DataMap(ResizeArray(),Dictionary()) + let lengthOfNewColumn (newColumn : CompositeCell []) (dataContexts : ResizeArray) = + if newColumn.Length <> dataContexts.Count then + failwith "Length of new column does not match length of data contexts." - member this.AddColumns(columns : CompositeColumn [], ?skipFillMissing : bool) = - columns |> Array.iter (fun c -> c.Validate(true) |> ignore) - table.AddColumns(columns, ?skipFillMissing = skipFillMissing) - DataMapAux.validate table.Headers table.Values true |> ignore +type DataMap(dataContexts : ResizeArray) = + + let mutable staticHash = 0 + let mutable dataContexts = dataContexts - static member addColumns (columns : CompositeColumn [], ?skipFillMissing : bool) = - fun (dm : DataMap) -> - let dm : DataMap = dm.Copy() - dm.AddColumns(columns, ?skipFillMissing = skipFillMissing) - dm + member this.StaticHash with get() = staticHash and set(value) = staticHash <- value - member this.Table = table + static member init() = DataMap(ResizeArray()) - member this.TryGetCellAt (row: int, column: int) = table.TryGetCellAt(row, column) - member this.GetExplicationColumn() = - table.GetColumnByHeader(DataMapAux.explicationHeader) + member this.GetExplicationColumn() = + DataMapAux.getOntologyColumn (fun dc -> dc.Explication) dataContexts - member this.AddExplicationColumn(cells : CompositeCell []) = - table.AddColumn(DataMapAux.explicationHeader, cells) + member this.SetExplicationColumn(cells : CompositeCell []) = + DataMapAux.setOntologyColumn (fun dc oa -> dc.Explication <- oa) cells dataContexts member this.GetUnitColumn() = - table.GetColumnByHeader(DataMapAux.unitHeader) + DataMapAux.getOntologyColumn (fun dc -> dc.Unit) dataContexts - member this.AddUnitColumn(cells : CompositeCell []) = - table.AddColumn(DataMapAux.unitHeader, cells) + member this.SetUnitColumn(cells : CompositeCell []) = + DataMapAux.setOntologyColumn (fun dc oa -> dc.Unit <- oa) cells dataContexts - member this.GetDataTypeColumn() = - table.GetColumnByHeader(DataMapAux.objectTypeHeader) + member this.GetObjectTypeColumn() = + DataMapAux.getOntologyColumn (fun dc -> dc.ObjectType) dataContexts - member this.AddDataTypeColumn(cells : CompositeCell []) = - table.AddColumn(DataMapAux.objectTypeHeader, cells) + member this.SetDataTypeColumn(cells : CompositeCell []) = + DataMapAux.setOntologyColumn (fun dc oa -> dc.ObjectType <- oa) cells dataContexts member this.GetDescriptionColumn() = - table.GetColumnByHeader(DataMapAux.descriptionHeader) - - member this.AddDescriptionColumn(cells : CompositeCell []) = - table.AddColumn(DataMapAux.descriptionHeader, cells) + DataMapAux.getStringColumn (fun dc -> dc.Description) dataContexts + member this.SetDescriptionColumn(cells : CompositeCell []) = + DataMapAux.setStringColumn (fun dc s -> dc.Description <- s) cells dataContexts - member this.GetRow(row: int, ?SkipValidation) = table.GetRow(row,?SkipValidation = SkipValidation) + member this.GetDataContext(row: int, ?SkipValidation) = + DataMapAux.SanityChecks.rowIndexInBoundaries row dataContexts + dataContexts.Item(row) - static member getRow(row: int, ?SkipValidation) = - fun (dm : DataMap) -> dm.GetRow(row,?SkipValidation = SkipValidation) + static member getDataContext(row: int, ?SkipValidation) = + fun (dm : DataMap) -> dm.GetDataContext(row,?SkipValidation = SkipValidation) member this.Copy() = DataMap( - ResizeArray(this.Headers), - Dictionary(this.Values) + dataContexts + |> Seq.map (fun dc -> dc.Copy()) + |> ResizeArray ) override this.Equals(obj) = - match obj with - | :? DataMap as dm -> - this.Table.Equals(dm.Table) - | _ -> false + HashCodes.hash this = HashCodes.hash obj override this.GetHashCode() = - this.Table.GetHashCode() \ No newline at end of file + dataContexts + |> HashCodes.boxHashSeq + |> fun x -> x :?> int \ No newline at end of file From 22618830d79537c2f3cc10e2f69567432b0e8594 Mon Sep 17 00:00:00 2001 From: HLWeil Date: Thu, 6 Jun 2024 17:03:54 +0200 Subject: [PATCH 2/4] adjust spreadsheet parsing to datamap datamodel change --- .../AnnotationTable/CompositeCell.fs | 9 - src/Spreadsheet/DataMapTable/DataMapColumn.fs | 94 +++++++---- src/Spreadsheet/DataMapTable/DataMapHeader.fs | 155 +++++++++++++----- src/Spreadsheet/DataMapTable/DataMapTable.fs | 21 +-- 4 files changed, 180 insertions(+), 99 deletions(-) diff --git a/src/Spreadsheet/AnnotationTable/CompositeCell.fs b/src/Spreadsheet/AnnotationTable/CompositeCell.fs index 74d0e294..ace84f8d 100644 --- a/src/Spreadsheet/AnnotationTable/CompositeCell.fs +++ b/src/Spreadsheet/AnnotationTable/CompositeCell.fs @@ -4,15 +4,6 @@ open ARCtrl open ARCtrl.Helper open FsSpreadsheet -//let fromFsCells (cells : list) : CompositeCell = -// let cellValues = cells |> List.map (fun c -> c.ValueAsString()) -// match cellValues with -// | [v] -> CompositeCell.createFreeText v -// | [v1;v2;v3] -> CompositeCell.createTermFromString(v1,v2,v3) -// | [v1;v2;v3;v4] -> CompositeCell.createUnitizedFromString(v1,v2,v3,v4) -// | _ -> -// failwithf "Dafuq" - let termFromFsCells (tsrCol : int option) (tanCol : int option ) (cells : list) : CompositeCell= let cellValues = cells |> List.map (fun c -> c.ValueAsString()) let tan = Option.map (fun i -> cellValues.[i]) tanCol diff --git a/src/Spreadsheet/DataMapTable/DataMapColumn.fs b/src/Spreadsheet/DataMapTable/DataMapColumn.fs index 84b97854..3c38eae1 100644 --- a/src/Spreadsheet/DataMapTable/DataMapColumn.fs +++ b/src/Spreadsheet/DataMapTable/DataMapColumn.fs @@ -3,46 +3,74 @@ open ARCtrl open ArcTable open FsSpreadsheet +open DataMapHeader.ActivePattern -let fromFsColumns (columns : list) : CompositeColumn = - let header, cellParser = +let setFromFsColumns (dc : ResizeArray) (columns : list) : ResizeArray = + let cellParser = columns |> List.map (fun c -> c.[1]) |> DataMapHeader.fromFsCells - let l = columns.[0].RangeAddress.LastAddress.RowNumber - let cells = - [| - for i = 2 to l do - columns - |> List.map (fun c -> c.[i]) - |> cellParser - |] - CompositeColumn.create(header,cells) - + for i = 2 to dc.Count - 1 do + columns + |> List.map (fun c -> c.[i]) + |> cellParser (dc.[i]) + |> ignore + dc -let toFsColumns (column : CompositeColumn) : FsCell list list = - let isTerm = column.Header.IsTermColumn - let isData = column.Header.IsDataColumn - let header = DataMapHeader.toFsCells column.Header - let cells = column.Cells |> Array.map (CompositeCell.toFsCells isTerm false) - if isTerm then +let toFsColumns (dc : ResizeArray) : FsCell list list = + let commentKeys = + dc + |> Seq.collect (fun dc -> dc.Comments |> Seq.map (fun c -> c.Name |> Option.defaultValue "")) + |> Seq.distinct + |> Seq.toList + let headers = + DataMapHeader.toFsCells commentKeys + let createTerm (oa : OntologyAnnotation option) = + match oa with + | Some oa -> + [ + oa.Name |> Option.defaultValue "" |> FsCell + oa.TermSourceREF |> Option.defaultValue "" |> FsCell + oa.TermAccessionNumber |> Option.defaultValue "" |> FsCell + ] + | None -> + [ + FsCell("") + FsCell("") + FsCell("") + ] + let createText (s : string option) = [ - [header.[0]; for i = 0 to column.Cells.Length - 1 do cells.[i].[0]] - [header.[1]; for i = 0 to column.Cells.Length - 1 do cells.[i].[1]] - [header.[2]; for i = 0 to column.Cells.Length - 1 do cells.[i].[2]] + FsCell(s |> Option.defaultValue "") ] - elif isData then - let hasFormat = column.Cells |> Seq.exists (fun c -> c.AsData.Format.IsSome) - let hasSelectorFormat = column.Cells |> Seq.exists (fun c -> c.AsData.SelectorFormat.IsSome) - + let createData (dc : DataContext) = [ - [header.[0]; for i = 0 to column.Cells.Length - 1 do cells.[i].[0]] - if hasFormat then - [header.[1]; for i = 0 to column.Cells.Length - 1 do cells.[i].[1]] - if hasSelectorFormat then - [header.[2]; for i = 0 to column.Cells.Length - 1 do cells.[i].[2]] + FsCell(dc.Name) + FsCell(dc.Format) + FsCell(dc.SelectorFormat) ] - else + let createRow (dc : DataContext) = [ - [header.[0]; for i = 0 to column.Cells.Length - 1 do cells.[i].[0]] - ] \ No newline at end of file + yield! (createData dc) + yield! (createTerm dc.Explication) + yield! (createTerm dc.Unit) + yield! (createTerm dc.ObjectType) + yield! (createText dc.Description) + yield! (createText dc.GeneratedBy) + yield! ( + commentKeys + |> List.map (fun key -> + dc.Comments + |> Seq.tryFind (fun c -> + Option.defaultValue "" c.Name = key) + |> Option.bind (fun c -> c.Value) + |> Option.defaultValue "" + |> FsCell + ) + ) + ] + [ + headers + for dc in dc do + createRow dc + ] \ No newline at end of file diff --git a/src/Spreadsheet/DataMapTable/DataMapHeader.fs b/src/Spreadsheet/DataMapTable/DataMapHeader.fs index 013f60f2..896cf41f 100644 --- a/src/Spreadsheet/DataMapTable/DataMapHeader.fs +++ b/src/Spreadsheet/DataMapTable/DataMapHeader.fs @@ -8,9 +8,25 @@ module ActivePattern = open Regex.ActivePatterns - let (|Term|_|) (categoryString : string) (categoryHeader : CompositeHeader) (cells : FsCell list) : (CompositeHeader*(FsCell list -> CompositeCell)) option = + let ontologyAnnotationFromFsCells (tsrCol : int option) (tanCol : int option) (cells : list) : OntologyAnnotation = + let cellValues = cells |> List.map (fun c -> c.ValueAsString()) + let tsr = Option.map (fun i -> cellValues.[i]) tsrCol + let tan = Option.map (fun i -> cellValues.[i]) tanCol + OntologyAnnotation(cellValues.[0],?tsr = tsr, ?tan = tan) + + let freeTextFromFsCells (cells : list) : string = + let cellValues = cells |> List.map (fun c -> c.ValueAsString()) + cellValues.[0] + + let dataFromFsCells (format : int option) (selectorFormat : int option) (cells : list) : Data = + let cellValues = cells |> List.map (fun c -> c.ValueAsString()) + let format = Option.map (fun i -> cellValues.[i]) format + let selectorFormat = Option.map (fun i -> cellValues.[i]) selectorFormat + Data(name = cellValues.[0],?format = format, ?selectorFormat = selectorFormat) + + let (|Term|_|) (categoryString : string) (cells : FsCell list) : ((FsCell list -> OntologyAnnotation)) option = let (|AC|_|) s = - if s = categoryString then Some categoryHeader else None + if s = categoryString then Some 1 else None let (|TSRColumnHeaderRaw|_|) (s : string) = if s.StartsWith("Term Source REF") then Some s else None let (|TANColumnHeaderRaw|_|) (s : string) = @@ -18,105 +34,154 @@ module ActivePattern = let cellValues = cells |> List.map (fun c -> c.ValueAsString()) match cellValues with | [AC header] -> - (header, CompositeCell.termFromFsCells None None) + (ontologyAnnotationFromFsCells None None) |> Some | [AC header; TSRColumnHeaderRaw _; TANColumnHeaderRaw _] -> - (header, CompositeCell.termFromFsCells (Some 1) (Some 2)) + (ontologyAnnotationFromFsCells (Some 1) (Some 2)) |> Some | [AC header; TANColumnHeaderRaw _; TSRColumnHeaderRaw _] -> - (header, CompositeCell.termFromFsCells (Some 2) (Some 1)) + (ontologyAnnotationFromFsCells (Some 2) (Some 1)) |> Some | _ -> None let (|Explication|_|) (cells : FsCell list) = match cells with - | Term DataMapAux.explicationShortHand DataMapAux.explicationHeader r -> - Some r + | Term DataMapAux.explicationShortHand r -> + (fun (dc : DataContext) (cells : FsCell list) -> + dc.Explication <- Some (r cells) + dc + ) + |> Some | _ -> None let (|Unit|_|) (cells : FsCell list) = match cells with - | Term DataMapAux.unitShortHand DataMapAux.unitHeader r -> - Some r + | Term DataMapAux.unitShortHand r -> + (fun (dc : DataContext) (cells : FsCell list) -> + dc.Unit <- Some (r cells) + dc + ) + |> Some | _ -> None let (|ObjectType|_|) (cells : FsCell list) = match cells with - | Term DataMapAux.objectTypeShortHand DataMapAux.objectTypeHeader r -> - Some r + | Term DataMapAux.objectTypeShortHand r -> + (fun (dc : DataContext) (cells : FsCell list) -> + dc.ObjectType <- Some (r cells) + dc + ) + |> Some | _ -> None let (|Description|_|) (cells : FsCell list) = let cellValues = cells |> List.map (fun c -> c.ValueAsString()) match cellValues with - | [DataMapAux.descriptionShortHand] -> Some(DataMapAux.descriptionHeader, CompositeCell.freeTextFromFsCells) + | [DataMapAux.descriptionShortHand] -> + (fun (dc : DataContext) (cells : FsCell list) -> + dc.Description <- Some (freeTextFromFsCells cells) + dc + ) + |> Some | _ -> None let (|GeneratedBy|_|) (cells : FsCell list) = let cellValues = cells |> List.map (fun c -> c.ValueAsString()) match cellValues with - | [DataMapAux.generatedByShortHand] -> Some(DataMapAux.generatedByHeader, CompositeCell.freeTextFromFsCells) + | [DataMapAux.generatedByShortHand] -> + (fun (dc : DataContext) (cells : FsCell list) -> + dc.GeneratedBy <- Some (freeTextFromFsCells cells) + dc + ) + |> Some | _ -> None let (|Data|_|) (cells : FsCell list) = let cellValues = cells |> List.map (fun c -> c.ValueAsString()) match cellValues with | DataMapAux.dataShortHand :: cols -> - - let format = cols |> List.tryFindIndex (fun s -> s.StartsWith("Data Format")) |> Option.map ((+) 1) - let selectorFormat = cols |> List.tryFindIndex (fun s -> s.StartsWith("Data Selector Format")) |> Option.map ((+) 1) - (CompositeHeader.Input (IOType.Data), CompositeCell.dataFromFsCells format selectorFormat) + (fun (dc : DataContext) (cells : FsCell list) -> + let format = cols |> List.tryFindIndex (fun s -> s.StartsWith("Data Format")) |> Option.map ((+) 1) + let selectorFormat = cols |> List.tryFindIndex (fun s -> s.StartsWith("Data Selector Format")) |> Option.map ((+) 1) + let d = dataFromFsCells format selectorFormat cells + dc.FilePath <- d.FilePath + dc.Selector <- d.Selector + dc.Format <- d.Format + dc.SelectorFormat <- d.SelectorFormat + dc + ) |> Some + + | _ -> None + let (|Comment|_|) (cells : FsCell list) = + let cellValues = cells |> List.map (fun c -> c.ValueAsString()) + match cellValues with + | [Comment key] -> + (fun (dc : DataContext) (cells : FsCell list) -> + let cellValues = cells |> List.map (fun c -> c.ValueAsString()) + let comment = cellValues.[0] + dc.Comments.Add(Comment.create(key,comment)) + dc + ) + |> Some | _ -> None - let (|FreeText|_|) (cells : FsCell list) = + let (|Freetext|_|) (cells : FsCell list) = let cellValues = cells |> List.map (fun c -> c.ValueAsString()) match cellValues with - | [text] -> - (CompositeHeader.FreeText text, CompositeCell.freeTextFromFsCells) - |> Some + | [key] -> + (fun (dc : DataContext) (cells : FsCell list) -> + let cellValues = cells |> List.map (fun c -> c.ValueAsString()) + let comment = cellValues.[0] + dc.Comments.Add(Comment.create(key,comment)) + dc + ) + |> Some | _ -> None open ActivePattern -let fromFsCells (cells : list) : CompositeHeader*(FsCell list -> CompositeCell) = +let fromFsCells (cells : FsCell list) : DataContext -> FsCell list -> DataContext = match cells with - | Data d -> d - | Explication e -> e - | Unit u -> u - | ObjectType ot -> ot - | Description d -> d - | GeneratedBy gb -> gb - | FreeText ft -> ft - | _ -> failwithf "Could not parse header group %O" cells - -let toFsCells (header : CompositeHeader) : list = - match header with - | CompositeHeader.Input IOType.Data -> - [ + | Explication r -> r + | Unit r -> r + | ObjectType r -> r + | Description r -> r + | GeneratedBy r -> r + | Data r -> r + | Comment r -> r + | Freetext r -> r + | _ -> failwithf "Could not parse data map column: %s" (cells |> List.map (fun c -> c.ValueAsString()) |> String.concat ", ") + +let toFsCells (commentKeys : string list) : list = + [ + yield! [ FsCell("Data") FsCell("Data Format") FsCell("Data Selector Format") - ] - | h when h = DataMapAux.explicationHeader -> - [ + ] + yield! [ FsCell(DataMapAux.explicationShortHand) FsCell("Term Source REF") FsCell("Term Accession Number") ] - | h when h = DataMapAux.unitHeader -> - [ + yield![ FsCell(DataMapAux.unitShortHand) FsCell("Term Source REF") FsCell("Term Accession Number") ] - | h when h = DataMapAux.objectTypeHeader -> - [ + yield![ FsCell(DataMapAux.objectTypeShortHand) FsCell("Term Source REF") FsCell("Term Accession Number") ] - | CompositeHeader.FreeText text -> - [FsCell(text)] - | _ -> failwithf "Could not parse DataMap header %O." header + yield! [ + FsCell(DataMapAux.descriptionShortHand) + ] + yield! [ + FsCell(DataMapAux.generatedByShortHand) + ] + for ck in commentKeys do + yield FsCell(ck) + ] diff --git a/src/Spreadsheet/DataMapTable/DataMapTable.fs b/src/Spreadsheet/DataMapTable/DataMapTable.fs index b188e514..65baa733 100644 --- a/src/Spreadsheet/DataMapTable/DataMapTable.fs +++ b/src/Spreadsheet/DataMapTable/DataMapTable.fs @@ -30,24 +30,24 @@ let tryDataMapTable (sheet : FsWorksheet) = |> Seq.tryFind (fun t -> t.Name.StartsWith datamapTablePrefix) /// Groups and parses a collection of single columns into the according ISA composite columns -let composeColumns (columns : seq) : CompositeColumn [] = +let composeColumns (columns : seq) : ResizeArray = + let l = (columns |> Seq.item 0).MaxRowIndex + let dc = ResizeArray([| for i = 0 to l - 1 do yield DataContext()|]) columns |> Seq.toList |> groupColumnsByHeader - |> List.map DataMapColumn.fromFsColumns - |> List.toArray - + |> List.iter (DataMapColumn.setFromFsColumns dc >> ignore) + dc /// Returns the protocol described by the headers and a function for parsing the values of the matrix to the processes of this protocol let tryFromFsWorksheet (sheet : FsWorksheet) = try match tryDataMapTable sheet with | Some (t: FsTable) -> - let compositeColumns = + let dataContexts = t.GetColumns(sheet.CellCollection) |> composeColumns - DataMap.init() - |> DataMap.addColumns(compositeColumns,skipFillMissing = true) + DataMap(dataContexts) |> Some | None -> None @@ -60,14 +60,11 @@ let toFsWorksheet (table : DataMap) = let ws = FsWorksheet("isa_datamap") // Cancel if there are no columns - if table.Table.Columns.Length = 0 then ws + if table.DataContexts.Count = 0 then ws else let columns = - table.Table.Columns - |> List.ofArray - |> List.sortBy classifyColumnOrder - |> List.collect DataMapColumn.toFsColumns + DataMapColumn.toFsColumns table.DataContexts let maxRow = columns.Head.Length let maxCol = columns.Length let fsTable = ws.Table("datamapTable",FsRangeAddress(FsAddress(1,1),FsAddress(maxRow,maxCol))) From ff4bad8421b6bb0c1103b152d69cc471b5259ca2 Mon Sep 17 00:00:00 2001 From: HLWeil Date: Fri, 7 Jun 2024 09:41:33 +0200 Subject: [PATCH 3/4] fix datamap spreadsheet parsing against tests --- src/Core/DataContext.fs | 3 + src/Core/Table/DataMap.fs | 30 ++++- tests/ARCtrl/ARCtrl.Tests.fs | 4 +- tests/Spreadsheet/DataMapTests.fs | 107 ++++++++---------- .../Spreadsheet.DataMap.fs | 9 +- 5 files changed, 81 insertions(+), 72 deletions(-) diff --git a/src/Core/DataContext.fs b/src/Core/DataContext.fs index f5b20279..58cb3fdc 100644 --- a/src/Core/DataContext.fs +++ b/src/Core/DataContext.fs @@ -35,6 +35,9 @@ type DataContext(?id,?name : string,?dataType,?format,?selectorFormat, ?explicat and set(generatedBy) = _generatedBy <- generatedBy + member this.AsData() = + Data(?id = this.ID,?name = this.Name, ?dataType = this.DataType, ?format = this.Format, ?selectorFormat = this.SelectorFormat, comments = this.Comments) + member this.Copy() = let copy = new DataContext() copy.ID <- this.ID diff --git a/src/Core/Table/DataMap.fs b/src/Core/Table/DataMap.fs index 9dd417c2..24e8b508 100644 --- a/src/Core/Table/DataMap.fs +++ b/src/Core/Table/DataMap.fs @@ -10,23 +10,47 @@ open Fable.Core module DataMapAux = [] - let dataShortHand = "Data" - + let dataMapName = "DataMap" + + let dataHeader = CompositeHeader.Input IOType.Data + + [] + let dataShortHand = "Data" + + let explication = OntologyAnnotation("Clarification","NCIT","http://purl.obolibrary.org/obo/NCIT_C94778") + [] let explicationShortHand = "Explication" + let explicationHeader = CompositeHeader.Parameter explication + + let unit = OntologyAnnotation("Unit","UO","http://purl.obolibrary.org/obo/UO_0000000") + [] let unitShortHand = "Unit" + let unitHeader = CompositeHeader.Parameter unit + + let objectType = OntologyAnnotation("Data Type","NCIT","http://purl.obolibrary.org/obo/NCIT_C42645") + + let objectTypeHeader = CompositeHeader.Parameter objectType + [] let objectTypeShortHand = "Object Type" + let descriptionHeader = CompositeHeader.FreeText "Description" + [] let descriptionShortHand = "Description" + let generatedByHeader = CompositeHeader.FreeText "Generated By" + [] let generatedByShortHand = "Generated By" + let allowedHeaders = + [dataHeader; explicationHeader; unitHeader; objectTypeHeader; descriptionHeader; generatedByHeader] + let getOntologyColumn (f : DataContext -> OntologyAnnotation option) (dataContexts : ResizeArray) = dataContexts |> Seq.map (fun dc -> @@ -80,6 +104,8 @@ type DataMap(dataContexts : ResizeArray) = member this.StaticHash with get() = staticHash and set(value) = staticHash <- value + member this.DataContexts with get() = dataContexts + static member init() = DataMap(ResizeArray()) diff --git a/tests/ARCtrl/ARCtrl.Tests.fs b/tests/ARCtrl/ARCtrl.Tests.fs index 15f56764..8716ee44 100644 --- a/tests/ARCtrl/ARCtrl.Tests.fs +++ b/tests/ARCtrl/ARCtrl.Tests.fs @@ -161,7 +161,7 @@ let private tests_read_contracts = testList "read_contracts" [ let a1 = inv.GetAssay(SimpleISA.Assay.proteomeIdentifer) let datamap = Expect.wantSome a1.DataMap "Proteome Assay was supposed to have datamap" - Expect.equal 2 datamap.Table.RowCount "Datamap was not read correctly" + Expect.equal 2 datamap.DataContexts.Count "Datamap was not read correctly" let a2 = inv.GetAssay(SimpleISA.Assay.metabolomeIdentifer) Expect.isNone a2.DataMap "Metabolome Assay was not supposed to have datamap" @@ -376,7 +376,7 @@ let private tests_updateContracts = testList "update_contracts" [ let isa = arc.ISA.Value let dm = Expect.wantSome (isa.GetAssay(SimpleISA.Assay.proteomeIdentifer).DataMap) "Assay should have datamap" - dm.Values.[(0,1)] <- CompositeCell.createDataFromString("Hello") + dm.GetDataContext(1).Name <- Some "Hello" let contracts = arc.GetUpdateContracts() Expect.equal contracts.Length 1 $"Should contain only assay datamap change contract" diff --git a/tests/Spreadsheet/DataMapTests.fs b/tests/Spreadsheet/DataMapTests.fs index 6d43065f..68c884c7 100644 --- a/tests/Spreadsheet/DataMapTests.fs +++ b/tests/Spreadsheet/DataMapTests.fs @@ -29,30 +29,28 @@ let private simpleTable = Expect.isSome table "Table was not created" let table = table.Value - Expect.equal table.Table.ColumnCount 6 "Wrong number of columns" - Expect.equal table.Table.RowCount 1 "Wrong number of rows" - - let expectedHeaders = - [ - DataMapAux.dataHeader - DataMapAux.explicationHeader - DataMapAux.unitHeader - DataMapAux.objectTypeHeader - DataMapAux.descriptionHeader - DataMapAux.generatedByHeader - ] - Expect.sequenceEqual table.Headers expectedHeaders "Headers did not match" - - let expectedCells = - [ - Data.dataValue - Explication.meanValue - Unit.ppmValue - ObjectType.floatValue - Description.descriptionValue - GeneratedBy.generatedByValue - ] - Expect.sequenceEqual (table.GetRow(0)) expectedCells "Cells did not match" + Expect.equal table.DataContexts.Count 1 "Wrong number of rows" + + let dc = table.GetDataContext(0) + + Expect.equal (dc.AsData()) Data.dataValue "Data did not match" + + let explication = Expect.wantSome dc.Explication "Explication was not set" + Expect.equal explication Explication.meanValue "Explication did not match" + + let unit = Expect.wantSome dc.Unit "Unit was not set" + Expect.equal unit Unit.ppmValue "Unit did not match" + + let objectType = Expect.wantSome dc.ObjectType "ObjectType was not set" + Expect.equal objectType ObjectType.floatValue "ObjectType did not match" + + let description = Expect.wantSome dc.Description "Description was not set" + Expect.equal description Description.descriptionValue "Description did not match" + + let generatedBy = Expect.wantSome dc.GeneratedBy "GeneratedBy was not set" + Expect.equal generatedBy GeneratedBy.generatedByValue "GeneratedBy did not match" + + Expect.isEmpty dc.Comments "Comments should be empty" ) testCase "Write" (fun () -> @@ -84,19 +82,8 @@ let private valuelessTable = Expect.isSome table "Table was not created" let table = table.Value - Expect.equal table.Table.ColumnCount 6 "Wrong number of columns" - Expect.equal table.Table.RowCount 0 "Wrong number of rows" - - let expectedHeaders = - [ - DataMapAux.dataHeader - DataMapAux.explicationHeader - DataMapAux.unitHeader - DataMapAux.objectTypeHeader - DataMapAux.descriptionHeader - DataMapAux.generatedByHeader - ] - Expect.sequenceEqual table.Headers expectedHeaders "Headers did not match" + Expect.equal table.DataContexts.Count 0 "Wrong number of rows" + ) // TODO: What should we do with units of empty columns? //testCase "Write" (fun () -> @@ -143,30 +130,28 @@ let private simpleFile = let table = DataMap.fromFsWorkbook wb - Expect.equal table.Table.ColumnCount 6 "Wrong number of columns" - Expect.equal table.Table.RowCount 1 "Wrong number of rows" - - let expectedHeaders = - [ - DataMapAux.dataHeader - DataMapAux.explicationHeader - DataMapAux.unitHeader - DataMapAux.objectTypeHeader - DataMapAux.descriptionHeader - DataMapAux.generatedByHeader - ] - Expect.sequenceEqual table.Headers expectedHeaders "Headers did not match" - - let expectedCells = - [ - Data.dataValue - Explication.meanValue - Unit.ppmValue - ObjectType.floatValue - Description.descriptionValue - GeneratedBy.generatedByValue - ] - Expect.sequenceEqual (table.GetRow(0)) expectedCells "Cells did not match" + Expect.equal table.DataContexts.Count 1 "Wrong number of rows" + + let dc = table.GetDataContext(0) + + Expect.equal (dc.AsData()) Data.dataValue "Data did not match" + + let explication = Expect.wantSome dc.Explication "Explication was not set" + Expect.equal explication Explication.meanValue "Explication did not match" + + let unit = Expect.wantSome dc.Unit "Unit was not set" + Expect.equal unit Unit.ppmValue "Unit did not match" + + let objectType = Expect.wantSome dc.ObjectType "ObjectType was not set" + Expect.equal objectType ObjectType.floatValue "ObjectType did not match" + + let description = Expect.wantSome dc.Description "Description was not set" + Expect.equal description Description.descriptionValue "Description did not match" + + let generatedBy = Expect.wantSome dc.GeneratedBy "GeneratedBy was not set" + Expect.equal generatedBy GeneratedBy.generatedByValue "GeneratedBy did not match" + + Expect.isEmpty dc.Comments "Comments should be empty" ) testCase "Write" (fun () -> diff --git a/tests/TestingUtils/TestObjects.Spreadsheet/Spreadsheet.DataMap.fs b/tests/TestingUtils/TestObjects.Spreadsheet/Spreadsheet.DataMap.fs index adc5777c..42fd172f 100644 --- a/tests/TestingUtils/TestObjects.Spreadsheet/Spreadsheet.DataMap.fs +++ b/tests/TestingUtils/TestObjects.Spreadsheet/Spreadsheet.DataMap.fs @@ -16,7 +16,6 @@ module Data = let dataValue = Data.create(Name = "MyDataFile.csv#col=1",Format = "text/csv", SelectorFormat = "https://datatracker.ietf.org/doc/html/rfc7111") - |> CompositeCell.Data let dataHeaderV1 = "Data" let dataHeaderV2 = "Data Format" @@ -40,7 +39,6 @@ module Explication = let pValueValue = OntologyAnnotation("p-value","NCIT","http://purl.obolibrary.org/obo/NCIT_C44185") - |> CompositeCell.Term let explicationHeaderV1 = "Explication" let explicationHeaderV2 = "Term Source REF" @@ -63,7 +61,6 @@ module Explication = // Arithmetic Mean, http://purl.obolibrary.org/obo/NCIT_C53319 let meanValue = OntologyAnnotation("Arithmetic Mean","NCIT","http://purl.obolibrary.org/obo/NCIT_C53319") - |> CompositeCell.Term let meanValueV1 = "Arithmetic Mean" let meanValueV2 = "NCIT" @@ -83,7 +80,6 @@ module Unit = let ppmValue = OntologyAnnotation("parts per million","UO","http://purl.obolibrary.org/obo/UO_0000169") - |> CompositeCell.Term let unitHeaderV1 = "Unit" let unitHeaderV2 = "Term Source REF" @@ -107,7 +103,6 @@ module ObjectType = let floatValue = OntologyAnnotation("float","NCIT","http://purl.obolibrary.org/obo/NCIT_C42645") - |> CompositeCell.Term let objectTypeHeaderV1 = "Object Type" let objectTypeHeaderV2 = "Term Source REF" @@ -129,7 +124,7 @@ module ObjectType = module Description = - let descriptionValue = CompositeCell.FreeText "This is a description" + let descriptionValue = "This is a description" let descriptionHeaderV1 = "Description" @@ -143,7 +138,7 @@ module Description = module GeneratedBy = - let generatedByValue = CompositeCell.FreeText "MyTool.exe" + let generatedByValue = "MyTool.exe" let generatedByHeaderV1 = "Generated By" From 0ead1eb2ca62057fb9e7c2eb585df94ad50ce56f Mon Sep 17 00:00:00 2001 From: HLWeil Date: Fri, 7 Jun 2024 09:42:13 +0200 Subject: [PATCH 4/4] fix datamap spreadsheet parser against tests --- src/Spreadsheet/DataMapTable/DataMapColumn.fs | 13 +++++++------ src/Spreadsheet/DataMapTable/DataMapHeader.fs | 17 +++++++++-------- src/Spreadsheet/DataMapTable/DataMapTable.fs | 3 ++- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/Spreadsheet/DataMapTable/DataMapColumn.fs b/src/Spreadsheet/DataMapTable/DataMapColumn.fs index 3c38eae1..89986a43 100644 --- a/src/Spreadsheet/DataMapTable/DataMapColumn.fs +++ b/src/Spreadsheet/DataMapTable/DataMapColumn.fs @@ -10,9 +10,9 @@ let setFromFsColumns (dc : ResizeArray) (columns : list) columns |> List.map (fun c -> c.[1]) |> DataMapHeader.fromFsCells - for i = 2 to dc.Count - 1 do + for i = 0 to dc.Count - 1 do columns - |> List.map (fun c -> c.[i]) + |> List.map (fun c -> c.[i+2]) |> cellParser (dc.[i]) |> ignore dc @@ -45,9 +45,9 @@ let toFsColumns (dc : ResizeArray) : FsCell list list = ] let createData (dc : DataContext) = [ - FsCell(dc.Name) - FsCell(dc.Format) - FsCell(dc.SelectorFormat) + FsCell(dc.Name |> Option.defaultValue "") + FsCell(dc.Format |> Option.defaultValue "") + FsCell(dc.SelectorFormat |> Option.defaultValue "") ] let createRow (dc : DataContext) = [ @@ -73,4 +73,5 @@ let toFsColumns (dc : ResizeArray) : FsCell list list = headers for dc in dc do createRow dc - ] \ No newline at end of file + ] + |> List.transpose \ No newline at end of file diff --git a/src/Spreadsheet/DataMapTable/DataMapHeader.fs b/src/Spreadsheet/DataMapTable/DataMapHeader.fs index 896cf41f..39bcbc7b 100644 --- a/src/Spreadsheet/DataMapTable/DataMapHeader.fs +++ b/src/Spreadsheet/DataMapTable/DataMapHeader.fs @@ -144,14 +144,15 @@ open ActivePattern let fromFsCells (cells : FsCell list) : DataContext -> FsCell list -> DataContext = match cells with - | Explication r -> r - | Unit r -> r - | ObjectType r -> r - | Description r -> r - | GeneratedBy r -> r - | Data r -> r - | Comment r -> r - | Freetext r -> r + | Explication r + | Unit r + | ObjectType r + | Description r + | GeneratedBy r + | Data r + | Comment r + | Freetext r -> + fun (dc : DataContext) (cells : FsCell list) -> r dc cells | _ -> failwithf "Could not parse data map column: %s" (cells |> List.map (fun c -> c.ValueAsString()) |> String.concat ", ") let toFsCells (commentKeys : string list) : list = diff --git a/src/Spreadsheet/DataMapTable/DataMapTable.fs b/src/Spreadsheet/DataMapTable/DataMapTable.fs index 65baa733..94cd8afd 100644 --- a/src/Spreadsheet/DataMapTable/DataMapTable.fs +++ b/src/Spreadsheet/DataMapTable/DataMapTable.fs @@ -31,7 +31,7 @@ let tryDataMapTable (sheet : FsWorksheet) = /// Groups and parses a collection of single columns into the according ISA composite columns let composeColumns (columns : seq) : ResizeArray = - let l = (columns |> Seq.item 0).MaxRowIndex + let l = (columns |> Seq.item 0).MaxRowIndex - 1 let dc = ResizeArray([| for i = 0 to l - 1 do yield DataContext()|]) columns |> Seq.toList @@ -88,4 +88,5 @@ let toFsWorksheet (table : DataMap) = fsTable.Cell(address, ws.CellCollection).SetValueAs value ) ) + ws.RescanRows() ws \ No newline at end of file