diff --git a/src/ARCTokenization/Terms.fs b/src/ARCTokenization/Terms.fs index f96688c..abc9f2a 100644 --- a/src/ARCTokenization/Terms.fs +++ b/src/ARCTokenization/Terms.fs @@ -22,6 +22,11 @@ module InvestigationMetadata = let ontology = OboOntology.fromLines true obo + let obsoleteCvTerms = + ontology.Terms + |> List.filter (fun t -> t.IsObsolete) + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "INVMSO")) + let cvTerms = ontology.Terms |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "INVMSO")) @@ -32,9 +37,14 @@ module StudyMetadata = let ontology = OboOntology.fromLines true obo + let obsoleteCvTerms = + ontology.Terms + |> List.filter (fun t -> t.IsObsolete) + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "STDMSO")) + let cvTerms = ontology.Terms - |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref ="STDMSO")) + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "STDMSO")) module AssayMetadata = @@ -44,7 +54,13 @@ module AssayMetadata = let cvTerms = ontology.Terms - |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref ="ASSMSO")) + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "ASSMSO")) + + let obsoleteCvTerms = + ontology.Terms + |> List.filter (fun t -> t.IsObsolete) + |> List.map (fun t -> CvTerm.create(accession = t.Id, name = t.Name, ref = "ASSMSO")) + module StructuralTerms = diff --git a/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo b/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo index 59098e3..4bd43e9 100644 --- a/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo +++ b/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.obo @@ -225,6 +225,21 @@ def: "" relationship: part_of INVMSO:00000021 ! INVESTIGATION CONTACTS relationship: follows INVMSO:00000031 ! Investigation Person Roles Term Accession Number +[Term] +id: INVMSO:00000093 +name: Comment[] +def: "" +synonym: "Comment[Investigation Person ORCID]" EXACT [] +is_obsolete: true +relationship: part_of INVMSO:00000021 ! INVESTIGATION CONTACTS + +[Term] +id: INVMSO:00000094 +name: Comment[Investigation Person ORCID] +def: "" +synonym: "Comment[]" EXACT [] +relationship: part_of INVMSO:00000021 ! INVESTIGATION CONTACTS + [Term] name: STUDY id: INVMSO:00000033 diff --git a/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.yml b/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.yml index e12415e..04982f9 100644 --- a/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.yml +++ b/src/ARCTokenization/structural_ontologies/investigation_metadata_structural_ontology.yml @@ -225,6 +225,21 @@ def: "" relationship: part_of INVMSO:00000021 ! INVESTIGATION CONTACTS relationship: follows INVMSO:00000031 ! Investigation Person Roles Term Accession Number + [Term] + id: INVMSO:00000093 + name: Comment[] + def: "" + synonym: "Comment[Investigation Person ORCID]" EXACT [] + is_obsolete: true + relationship: part_of INVMSO:00000021 ! INVESTIGATION CONTACTS + + [Term] + id: INVMSO:00000094 + name: Comment[Investigation Person ORCID] + def: "" + synonym: "Comment[]" EXACT [] + relationship: part_of INVMSO:00000021 ! INVESTIGATION CONTACTS + [Term] name: STUDY id: INVMSO:00000033 diff --git a/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo b/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo index 379a491..2c67a96 100644 --- a/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo +++ b/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.obo @@ -18,7 +18,7 @@ synonym: "STUDY METADATA" EXACT [] relationship: part_of STDMSO:00000001 ! Study Metadata [Term] -id: STDMSO:00000051 +id: STDMSO:00000062 name: STUDY METADATA def: "" synonym: "STUDY" EXACT [] diff --git a/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.yml b/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.yml index 9789a69..f0efde5 100644 --- a/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.yml +++ b/src/ARCTokenization/structural_ontologies/study_metadata_structural_ontology.yml @@ -18,7 +18,7 @@ def: "" relationship: part_of STDMSO:00000001 ! Study Metadata [Term] - id: STDMSO:00000051 + id: STDMSO:00000062 name: STUDY METADATA def: "" synonym: "STUDY" EXACT [] diff --git a/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj b/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj index 211ebe6..fc8846f 100644 --- a/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj +++ b/tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj @@ -10,11 +10,12 @@ + + - diff --git a/tests/ARCTokenization.Tests/IntegrationTests/AssayAnnotationTable.fs b/tests/ARCTokenization.Tests/IntegrationTests/AssayAnnotationTable.fs index e2eb019..0bcaa8e 100644 --- a/tests/ARCTokenization.Tests/IntegrationTests/AssayAnnotationTable.fs +++ b/tests/ARCTokenization.Tests/IntegrationTests/AssayAnnotationTable.fs @@ -1,4 +1,4 @@ -module IntegrationTests.Assay +namespace IntegrationTests open ControlledVocabulary open FsSpreadsheet @@ -9,121 +9,122 @@ open Xunit open TestUtils open TestObjects -module Correct = +module Assay = + module Correct = - module ``Assay with only source and sample column`` = + module ``Assay with only source and sample column`` = - let assay = Assays.Correct.``assay with only source and sample column`` + let assay = Assays.Correct.``assay with only source and sample column`` - [] - let ``AnnotationTable count`` () = - Assert.Equal(assay.Length, 1) + [] + let ``AnnotationTable count`` () = + Assert.Equal(assay.Length, 1) - let table = assay.[0] |> snd + let table = assay.[0] |> snd - [] - let ``IOColumns count`` () = - TokenizedAnnotationTable.hasIOColumnAmount 2 table + [] + let ``IOColumns count`` () = + TokenizedAnnotationTable.hasIOColumnAmount 2 table - [] - let ``TermRelatedBuildingBlocks count`` () = - TokenizedAnnotationTable.hasTermRelatedBuildingBlockAmount 0 table + [] + let ``TermRelatedBuildingBlocks count`` () = + TokenizedAnnotationTable.hasTermRelatedBuildingBlockAmount 0 table - let expectedIOColumns = - [ + let expectedIOColumns = [ - CvParam( - id = "(n/a)", - name = "Source Name", - ref = "(n/a)", - pv = (ParamValue.Value "Source A"), - attributes = [] - ) + [ + CvParam( + id = "(n/a)", + name = "Source Name", + ref = "(n/a)", + pv = (ParamValue.Value "Source A"), + attributes = [] + ) + ] + [ + CvParam( + id = "(n/a)", + name = "Sample Name", + ref = "(n/a)", + pv = (ParamValue.Value "Sample A"), + attributes = [] + ) + ] ] - [ - CvParam( - id = "(n/a)", - name = "Sample Name", - ref = "(n/a)", - pv = (ParamValue.Value "Sample A"), - attributes = [] - ) - ] - ] - let expectedTermRelatedBuildingBlocks: CvParam list list = [] + let expectedTermRelatedBuildingBlocks: CvParam list list = [] - [] - let ``IOColumns CvParams`` () = - table - |> TokenizedAnnotationTable.IOColumnsEqual expectedIOColumns + [] + let ``IOColumns CvParams`` () = + table + |> TokenizedAnnotationTable.IOColumnsEqual expectedIOColumns - [] - let ``TermRelatedBuildingBlocks CvParams`` () = - table - |> TokenizedAnnotationTable.termRelatedBuildingBlocksEqual expectedTermRelatedBuildingBlocks + [] + let ``TermRelatedBuildingBlocks CvParams`` () = + table + |> TokenizedAnnotationTable.termRelatedBuildingBlocksEqual expectedTermRelatedBuildingBlocks - module ``Assay with single characteristics`` = + module ``Assay with single characteristics`` = - let assay = Assays.Correct.``assay with single characteristics`` + let assay = Assays.Correct.``assay with single characteristics`` - [] - let ``AnnotationTable count`` () = - Assert.Equal(assay.Length, 1) + [] + let ``AnnotationTable count`` () = + Assert.Equal(assay.Length, 1) - let table = assay.[0] |> snd + let table = assay.[0] |> snd - [] - let ``IOColumns count`` () = - TokenizedAnnotationTable.hasIOColumnAmount 2 table + [] + let ``IOColumns count`` () = + TokenizedAnnotationTable.hasIOColumnAmount 2 table - [] - let ``TermRelatedBuildingBlocks count`` () = - TokenizedAnnotationTable.hasTermRelatedBuildingBlockAmount 1 table + [] + let ``TermRelatedBuildingBlocks count`` () = + TokenizedAnnotationTable.hasTermRelatedBuildingBlockAmount 1 table - let expectedIOColumns = - [ - [ - CvParam( - id = "(n/a)", - name = "Source Name", - ref = "(n/a)", - pv = (ParamValue.Value "Source A"), - attributes = [] - ) - ] + let expectedIOColumns = [ - CvParam( - id = "(n/a)", - name = "Sample Name", - ref = "(n/a)", - pv = (ParamValue.Value "Sample A"), - attributes = [] - ) + [ + CvParam( + id = "(n/a)", + name = "Source Name", + ref = "(n/a)", + pv = (ParamValue.Value "Source A"), + attributes = [] + ) + ] + [ + CvParam( + id = "(n/a)", + name = "Sample Name", + ref = "(n/a)", + pv = (ParamValue.Value "Sample A"), + attributes = [] + ) + ] ] - ] - let expectedTermRelatedBuildingBlocks = - [ + let expectedTermRelatedBuildingBlocks = [ - CvParam( - id = "Term Accession Number (OBI:0100026)", - name = "Characteristic [organism]", - ref = "Term Source REF (OBI:0100026)", - pv = ParamValue.CvValue (CvTerm.create(accession = "http://purl.obolibrary.org/obo/NCBITaxon_3702", name = "Arabidopsis thaliana", ref = "NCBITaxon")), - attributes = [] - ) + [ + CvParam( + id = "Term Accession Number (OBI:0100026)", + name = "Characteristic [organism]", + ref = "Term Source REF (OBI:0100026)", + pv = (ParamValue.CvValue (CvTerm.create(accession = "http://purl.obolibrary.org/obo/NCBITaxon_3702", name = "Arabidopsis thaliana", ref = "NCBITaxon"))), + attributes = [] + ) + ] ] - ] - [] - let ``IOColumns CvParams`` () = - table - |> TokenizedAnnotationTable.IOColumnsEqual expectedIOColumns + [] + let ``IOColumns CvParams`` () = + table + |> TokenizedAnnotationTable.IOColumnsEqual expectedIOColumns - [] - let ``TermRelatedBuildingBlocks CvParams`` () = - table - |> TokenizedAnnotationTable.termRelatedBuildingBlocksEqual expectedTermRelatedBuildingBlocks \ No newline at end of file + [] + let ``TermRelatedBuildingBlocks CvParams`` () = + table + |> TokenizedAnnotationTable.termRelatedBuildingBlocksEqual expectedTermRelatedBuildingBlocks \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs index ca05ac7..6569c36 100644 --- a/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs +++ b/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs @@ -1,17 +1,16 @@ -module IntegrationTests.AssayMetadata +namespace IntegrationTests -open ControlledVocabulary -open FsSpreadsheet -open FsSpreadsheet.ExcelIO -open ARCTokenization -open Xunit +module AssayMetadata = -open TestUtils + open ControlledVocabulary + open FsSpreadsheet + open FsSpreadsheet.ExcelIO + open ARCTokenization + open Xunit + open TestUtils - -let allExpectedMetadataTerms = - - Terms.AssayMetadata.cvTerms - |> List.skip 1 //(ignore root term) - |> List.map (fun p -> CvParam(p, ParamValue.Value "", [])) \ No newline at end of file + let allExpectedMetadataTermsEmpty = + Terms.AssayMetadata.cvTerms + |> List.skip 1 //(ignore root term) + |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), [])) \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs index a5a14b7..386d7a8 100644 --- a/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs +++ b/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs @@ -1,240 +1,152 @@ -module IntegrationTests.InvestigationMetadata +namespace IntegrationTests -open ControlledVocabulary -open FsSpreadsheet -open FsSpreadsheet.ExcelIO -open ARCTokenization -open Xunit +module InvestigationMetadata = -open TestUtils + open ControlledVocabulary + open FsSpreadsheet + open FsSpreadsheet.ExcelIO + open ARCTokenization + open Xunit -let parsedInvestigationMetadataEmpty = Investigation.parseMetadataSheetFromFile "Fixtures/incorrect/investigation_empty.xlsx" -let parsedInvestigationMetadataSimple = Investigation.parseMetadataSheetFromFile "Fixtures/correct/investigation_simple.xlsx" + open TestUtils -let allExpectedMetadataTermsEmpty = - // maybe we want to not rely on parsed obo? i think we can. - //[ - // CvParam(("INVMSO:00000002","ONTOLOGY SOURCE REFERENCE", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000003","Term Source Name", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000004","Term Source File", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000005","Term Source Version", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000006","Term Source Description", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000007","INVESTIGATION", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000008","Investigation Identifier", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000009","Investigation Title", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000010","Investigation Description", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000011","Investigation Submission Date", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000012","Investigation Public Release Date", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000013","INVESTIGATION PUBLICATIONS", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000014","Investigation Publication PubMed ID", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000015","Investigation Publication DOI", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000016","Investigation Publication Author List", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000017","Investigation Publication Title", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000018","Investigation Publication Status", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000019","Investigation Publication Status Term Accession Number", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000020","Investigation Publication Status Term Source REF", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000021","INVESTIGATION CONTACTS", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000022","Investigation Person Last Name", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000023","Investigation Person First Name", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000024","Investigation Person Mid Initials", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000025","Investigation Person Email", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000026","Investigation Person Phone", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000027","Investigation Person Fax", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000028","Investigation Person Address", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000029","Investigation Person Affiliation", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000030","Investigation Person Roles", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000031","Investigation Person Roles Term Accession Number", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000032","Investigation Person Roles Term Source REF", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000033","STUDY", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000034","Study Identifier", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000035","Study Title", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000036","Study Description", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000037","Study Submission Date", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000038","Study Public Release Date", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000039","Study File Name", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000040","STUDY DESIGN DESCRIPTORS", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000041","Study Design Type", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000042","Study Design Type Term Accession Number", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000043","Study Design Type Term Source REF", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000044","STUDY PUBLICATIONS", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000045","Study Publication PubMed ID", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000046","Study Publication DOI", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000047","Study Publication Author List", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000048","Study Publication Title", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000049","Study Publication Status", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000050","Study Publication Status Term Accession Number", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000051","Study Publication Status Term Source REF", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000052","STUDY FACTORS", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000053","Study Factor Name", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000054","Study Factor Type", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000055","Study Factor Type Term Accession Number", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000056","Study Factor Type Term Source REF", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000057","STUDY ASSAYS", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000058","Study Assay Measurement Type", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000059","Study Assay Measurement Type Term Accession Number", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000060","Study Assay Measurement Type Term Source REF", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000061","Study Assay Technology Type", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000062","Study Assay Technology Type Term Accession Number", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000063","Study Assay Technology Type Term Source REF", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000064","Study Assay Technology Platform", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000065","Study Assay File Name", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000066","STUDY PROTOCOLS", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000067","Study Protocol Name", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000068","Study Protocol Type", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000069","Study Protocol Type Term Accession Number", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000070","Study Protocol Type Term Source REF", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000071","Study Protocol Description", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000072","Study Protocol URI", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000073","Study Protocol Version", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000074","Study Protocol Parameters Name", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000075","Study Protocol Parameters Term Accession Number", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000076","Study Protocol Parameters Term Source REF", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000077","Study Protocol Components Name", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000078","Study Protocol Components Type", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000079","Study Protocol Components Type Term Accession Number", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000080","Study Protocol Components Type Term Source REF", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000081","STUDY CONTACTS", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000082","Study Person Last Name", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000083","Study Person First Name", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000084","Study Person Mid Initials", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000085","Study Person Email", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000086","Study Person Phone", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000087","Study Person Fax", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000088","Study Person Address", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000089","Study Person Affiliation", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000090","Study Person Roles", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000091","Study Person Roles Term Accession Number", "INVMSO"), ParamValue.Value "", []) - // CvParam(("INVMSO:00000092","Study Person Roles Term Source REF", "INVMSO"), ParamValue.Value "", []) - //] - Terms.InvestigationMetadata.cvTerms - |> List.skip 1 //(ignore root term) - |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), [])) + let parsedInvestigationMetadataEmpty = Investigation.parseMetadataSheetFromFile "Fixtures/incorrect/investigation_empty.xlsx" + let parsedInvestigationMetadataSimple = Investigation.parseMetadataSheetFromFile "Fixtures/correct/investigation_simple.xlsx" -[] -let ``First Param is CvParam`` () = - Assert.True (parsedInvestigationMetadataEmpty.Head |> Param.tryCvParam).IsSome + let allExpectedMetadataTermsEmpty = + Terms.InvestigationMetadata.cvTerms + |> List.skip 1 //(ignore root term) + |> List.filter (fun t -> not (t.Name.StartsWith("Comment"))) // ignore orcids + |> List.filter (fun t -> not (List.contains t Terms.InvestigationMetadata.obsoleteCvTerms)) // ignore obsolete terms + |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), [])) -[] -let ``First CvParam`` () = CvParam.structuralEquality (parsedInvestigationMetadataEmpty.Head :?> CvParam) allExpectedMetadataTermsEmpty[0] + [] + let ``First Param is CvParam`` () = + Assert.True (parsedInvestigationMetadataEmpty.Head |> Param.tryCvParam).IsSome -[] -let ``Empty investigation is parsed with all structural ontology terms in order`` () = - Assert.All((List.zip allExpectedMetadataTermsEmpty parsedInvestigationMetadataEmpty), (fun (expected,actual) -> - CvParam.structuralEquality (expected) (actual :?> CvParam) - )) + [] + let ``First CvParam`` () = CvParam.structuralEquality (parsedInvestigationMetadataEmpty.Head :?> CvParam) allExpectedMetadataTermsEmpty[0] -let expectedTermValuesSimple = - [ - [""] - [""] - [""] - [""] - [""] - [""] - [""; "iid"] - [""; "ititle"] - [""; "idesc"] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""; "Maus"; "Keider"; "müller"; ""; "oih"] - [""; "Oliver"; ""; "andreas";] - [""; "L. I."; "C."] - [""; "maus@nfdi4plants.org"] - [""] - [""] - [""] - [""; ""; "Affe"] - [""] - [""] - [""] - [""] - [""; "sid"] - [""; "stitle"] - [""; "sdesc"] - [""] - [""] - [""; @"sid\isa.study.xlsx"] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""; @"aid\isa.assay.xlsx"; @"aid2\isa.assay.xlsx"] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""; "weil"] - [""; ""; "lukas"] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - [""] - ] + [] + let ``Empty investigation is parsed with all structural ontology terms in order`` () = + Assert.All((List.zip allExpectedMetadataTermsEmpty parsedInvestigationMetadataEmpty), (fun (expected,actual) -> + CvParam.structuralEquality (expected) (actual :?> CvParam) + )) -let allExpectedMetadataTermsFull = - Terms.InvestigationMetadata.cvTerms - |> List.skip 1 //(ignore root term) - |> List.zip expectedTermValuesSimple - |> List.map (fun (values,term) -> - values - |> List.mapi (fun i v -> - if i = 0 then - CvParam(term, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), []) - else - CvParam(term, ParamValue.Value v, []) + let expectedTermValuesSimple = + [ + [""] + [""] + [""] + [""] + [""] + [""] + [""; "iid"] + [""; "ititle"] + [""; "idesc"] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""; "Maus"; "Keider"; "müller"; ""; "oih"] + [""; "Oliver"; ""; "andreas";] + [""; "L. I."; "C."] + [""; "maus@nfdi4plants.org"] + [""] + [""] + [""] + [""; ""; "Affe"] + [""] + [""] + [""] + [""] + [""; "sid"] + [""; "stitle"] + [""; "sdesc"] + [""] + [""] + [""; @"sid\isa.study.xlsx"] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""; @"aid\isa.assay.xlsx"; @"aid2\isa.assay.xlsx"] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""; "weil"] + [""; ""; "lukas"] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + [""] + ] + + let allExpectedMetadataTermsFull = + Terms.InvestigationMetadata.cvTerms + |> List.skip 1 //(ignore root term) + |> List.filter (fun t -> not (t.Name.StartsWith("Comment"))) // ignore orcids + |> List.filter (fun t -> not (List.contains t Terms.InvestigationMetadata.obsoleteCvTerms)) // ignore obsolete terms + |> List.zip expectedTermValuesSimple + |> List.map (fun (values,term) -> + values + |> List.mapi (fun i v -> + if i = 0 then + CvParam(term, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), []) + else + CvParam(term, ParamValue.Value v, []) + ) ) - ) - |> List.concat + |> List.concat -[] -let ``Simple investigation is parsed with all structural ontology terms in order`` () = - Assert.All((List.zip allExpectedMetadataTermsFull parsedInvestigationMetadataSimple), (fun (expected,actual) -> - CvParam.structuralEquality (expected) (actual :?> CvParam) - )) \ No newline at end of file + [] + let ``Simple investigation is parsed with all structural ontology terms in order`` () = + Assert.All((List.zip allExpectedMetadataTermsFull parsedInvestigationMetadataSimple), (fun (expected,actual) -> + CvParam.structuralEquality (expected) (actual :?> CvParam) + )) \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs index 46d5517..7f7958b 100644 --- a/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs +++ b/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs @@ -1,2 +1,16 @@ -module StudyMetadata +namespace IntegrationTests +module StudyMetadata = + + open ControlledVocabulary + open FsSpreadsheet + open FsSpreadsheet.ExcelIO + open ARCTokenization + open Xunit + + open TestUtils + + let allExpectedMetadataTermsEmpty = + Terms.StudyMetadata.cvTerms + |> List.skip 1 //(ignore root term) + |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), [])) \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/StructuralOntologytests.fs b/tests/ARCTokenization.Tests/StructuralOntologytests.fs new file mode 100644 index 0000000..d61e0f4 --- /dev/null +++ b/tests/ARCTokenization.Tests/StructuralOntologytests.fs @@ -0,0 +1,54 @@ +namespace StructuralOntologyTests + +open FsOboParser +open ARCTokenization +open ARCTokenization.Terms +open Xunit + +module InvestigationMetadata = + + [] + let ``no duplicate term ids`` () = + let expected = [1 .. InvestigationMetadata.ontology.Terms.Length] + let actual = + InvestigationMetadata.ontology.Terms + |> List.map (fun t -> + t.Id.Replace("INVMSO:","") |> int + ) + |> List.sort + Assert.All( + List.zip expected actual, + (fun (e,a) -> Assert.Equal(e,a)) + ) + +module StudyMetadata = + + [] + let ``no duplicate term ids`` () = + let expected = [1 .. StudyMetadata.ontology.Terms.Length] + let actual = + StudyMetadata.ontology.Terms + |> List.map (fun t -> + t.Id.Replace("STDMSO:","") |> int + ) + |> List.sort + Assert.All( + List.zip expected actual, + (fun (e,a) -> Assert.Equal(e,a)) + ) + +module AssayMetadata = + + [] + let ``no duplicate term ids`` () = + let expected = [1 .. AssayMetadata.ontology.Terms.Length] + let actual = + AssayMetadata.ontology.Terms + |> List.map (fun t -> + t.Id.Replace("ASSMSO:","") |> int + ) + |> List.sort + Assert.All( + List.zip expected actual, + (fun (e,a) -> Assert.Equal(e,a)) + ) \ No newline at end of file