From 7353259d2acc40e209ae7f63f4442d69c324c0fb Mon Sep 17 00:00:00 2001
From: nbollis <nbollis@comcast.net>
Date: Thu, 19 Sep 2024 15:04:20 -0500
Subject: [PATCH 01/17] Added in base classes

---
 mzLib/Chemistry/ClassExtensions.cs            |   1 +
 .../Enums/DissociationType.cs                 |   5 +
 mzLib/MzLibUtil/ClassExtensions.cs            |   2 +-
 .../Fragmentation/FragmentationTerminus.cs    |  19 +-
 .../Oligo/DissociationTypeCollection.cs       |   3 +-
 .../Digestion/NucleolyticOligo.cs             | 168 +++++++++
 .../Digestion/OligoWithSetMods.cs             | 336 +++++++++++++++++
 .../Digestion/RnaDigestionParams.cs           |  51 +++
 mzLib/Transcriptomics/Digestion/Rnase.cs      |  60 ++-
 .../Interfaces/INucleicAcid.cs                |   2 +-
 mzLib/Transcriptomics/NucleicAcid.cs          | 356 ++++++++++++++++++
 mzLib/Transcriptomics/RNA.cs                  |  51 +++
 mzLib/mzLib.sln.DotSettings                   |   5 +-
 13 files changed, 1037 insertions(+), 22 deletions(-)
 create mode 100644 mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
 create mode 100644 mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
 create mode 100644 mzLib/Transcriptomics/Digestion/RnaDigestionParams.cs
 create mode 100644 mzLib/Transcriptomics/NucleicAcid.cs
 create mode 100644 mzLib/Transcriptomics/RNA.cs
diff --git a/mzLib/Chemistry/ClassExtensions.cs b/mzLib/Chemistry/ClassExtensions.cs
index 8bb5aecdc..7093e1f5f 100644
--- a/mzLib/Chemistry/ClassExtensions.cs
+++ b/mzLib/Chemistry/ClassExtensions.cs
@@ -48,6 +48,7 @@ public static double ToMass(this double massToChargeRatio, int charge)
             return Math.Abs(charge) * massToChargeRatio - charge * Constants.ProtonMass;
         }
 
+        public static double? RoundedDouble(this double myNumber, int places = 9) => RoundedDouble(myNumber as double?, places);
         public static double? RoundedDouble(this double? myNumber, int places = 9)
         {
             if (myNumber != null)
diff --git a/mzLib/MassSpectrometry/Enums/DissociationType.cs b/mzLib/MassSpectrometry/Enums/DissociationType.cs
index 1ac136197..ca738b3fa 100644
--- a/mzLib/MassSpectrometry/Enums/DissociationType.cs
+++ b/mzLib/MassSpectrometry/Enums/DissociationType.cs
@@ -109,6 +109,11 @@ public enum DissociationType
         /// </summary>
         LowCID,
 
+        /// <summary>
+        /// activated ion electron photo detachment dissociation
+        /// </summary>
+        aEPD,
+
         Unknown,
         AnyActivationType,
         Custom,
diff --git a/mzLib/MzLibUtil/ClassExtensions.cs b/mzLib/MzLibUtil/ClassExtensions.cs
index 0129154a4..05e5cfd1e 100644
--- a/mzLib/MzLibUtil/ClassExtensions.cs
+++ b/mzLib/MzLibUtil/ClassExtensions.cs
@@ -19,6 +19,7 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Text;
 using System.Text.RegularExpressions;
 
 namespace MzLibUtil
@@ -122,6 +123,5 @@ public static string GetPeriodTolerantFilenameWithoutExtension(this string fileP
         {
             return PeriodTolerantFilenameWithoutExtension.GetPeriodTolerantFilenameWithoutExtension(filePath);
         }
-
     }
 }
\ No newline at end of file
diff --git a/mzLib/Omics/Fragmentation/FragmentationTerminus.cs b/mzLib/Omics/Fragmentation/FragmentationTerminus.cs
index 146309caa..788041690 100644
--- a/mzLib/Omics/Fragmentation/FragmentationTerminus.cs
+++ b/mzLib/Omics/Fragmentation/FragmentationTerminus.cs
@@ -1,19 +1,12 @@
-﻿using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace Omics.Fragmentation
+﻿namespace Omics.Fragmentation
 {
     public enum FragmentationTerminus
-        {
-            Both, //N- and C-terminus
-            N, //N-terminus only
-            C, //C-terminus only
+    {
+        Both, //N- and C-terminus
+        N, //N-terminus only
+        C, //C-terminus only
         None, //used for internal fragments, could be used for top down intact mass?
         FivePrime, // 5' for NucleicAcids
         ThreePrime, // 3' for NucleicAcids
-        }
-    
+    }
 }
diff --git a/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
index d5b020160..b2b7cd891 100644
--- a/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
+++ b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
@@ -1 +1,2 @@
-﻿using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Threading.Tasks;using Chemistry;using MassSpectrometry;namespace Omics.Fragmentation.Oligo{    /// <summary>    /// Methods dealing with specific product type for RNA molecules    /// </summary>    public static class DissociationTypeCollection    {        /// <summary>        /// Product Ion types by dissociation method        /// </summary>        private static readonly Dictionary<DissociationType, List<ProductType>> ProductsFromDissociationType =            new Dictionary<DissociationType, List<ProductType>>()            {                { DissociationType.Unknown, new List<ProductType>() },                {                    DissociationType.CID,                    new List<ProductType> { ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w, ProductType.y, ProductType.yWaterLoss, ProductType.M }                },                { DissociationType.LowCID, new List<ProductType>() { } },                { DissociationType.IRMPD, new List<ProductType>() { } },                { DissociationType.ECD, new List<ProductType> { } },                {                    DissociationType.PQD,                     new List<ProductType>                     {                         ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d, ProductType.dWaterLoss,                                     ProductType.w, ProductType.x, ProductType.y, ProductType.yWaterLoss, ProductType.d, ProductType.M                    }                },                { DissociationType.ETD, new List<ProductType> { } },                {                    DissociationType.HCD,                    new List<ProductType> { ProductType.w, ProductType.y, ProductType.aBaseLoss, ProductType.dWaterLoss, ProductType.M }                },                { DissociationType.AnyActivationType, new List<ProductType> { } },                { DissociationType.EThcD, new List<ProductType> { } },                { DissociationType.Custom, new List<ProductType> { } },                { DissociationType.ISCID, new List<ProductType> { } }            };        /// <summary>        /// Returns list of products types based upon the dissociation type        /// </summary>        /// <param name="dissociationType"></param>        /// <returns></returns>        public static List<ProductType> GetRnaProductTypesFromDissociationType(this DissociationType dissociationType) =>            ProductsFromDissociationType[dissociationType];        /// <summary>        /// Mass to be added or subtracted        /// </summary>        private static readonly Dictionary<ProductType, ChemicalFormula> FragmentIonCaps =            new Dictionary<ProductType, ChemicalFormula>            {                { ProductType.a, ChemicalFormula.ParseFormula("H") },                { ProductType.aWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },                { ProductType.b, ChemicalFormula.ParseFormula("OH") },                { ProductType.bWaterLoss, ChemicalFormula.ParseFormula("H-1") },                { ProductType.c, ChemicalFormula.ParseFormula("O3H2P") },                { ProductType.cWaterLoss, ChemicalFormula.ParseFormula("O2P") },                { ProductType.d, ChemicalFormula.ParseFormula("O4H2P") },                { ProductType.dWaterLoss, ChemicalFormula.ParseFormula("O3P") },                { ProductType.w, ChemicalFormula.ParseFormula("H") },                { ProductType.wWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },                { ProductType.x, ChemicalFormula.ParseFormula("O-1H") },                { ProductType.xWaterLoss, ChemicalFormula.ParseFormula("O-2H-1") },                { ProductType.y, ChemicalFormula.ParseFormula("O-3P-1") },                { ProductType.yWaterLoss, ChemicalFormula.ParseFormula("O-4H-2P-1") },                { ProductType.z, ChemicalFormula.ParseFormula("O-4P-1") },                { ProductType.zWaterLoss, ChemicalFormula.ParseFormula("O-5H-2P-1") },                //fragment - Base chemical formula is the corresponding fragment chemical formula subtracing 1 H as H is lost when base is removed                { ProductType.aBaseLoss, ChemicalFormula.ParseFormula("H-2") }, // "H-1" -H                 { ProductType.bBaseLoss, ChemicalFormula.ParseFormula("O1H-2") }, //"OH1" -H                { ProductType.cBaseLoss, ChemicalFormula.ParseFormula("O3H-1P") }, //"O3P" -H                { ProductType.dBaseLoss, ChemicalFormula.ParseFormula("O4H-1P") }, //"O4H2P" -H                { ProductType.wBaseLoss, ChemicalFormula.ParseFormula("H-2") }, //"H"-H                { ProductType.xBaseLoss, ChemicalFormula.ParseFormula("O-1H-2") }, //"O-1H" -H                { ProductType.yBaseLoss, ChemicalFormula.ParseFormula("O-3H-2P-1") }, //"O-3P-1" -H                { ProductType.zBaseLoss, ChemicalFormula.ParseFormula("O-4H-3P-1") }, //"O-4H-1P-1" -1                { ProductType.M, new ChemicalFormula() }            };        /// <summary>        /// Returns mass shift by product type        /// </summary>        /// <param name="type"></param>        /// <returns></returns>        public static double GetRnaMassShiftFromProductType(this ProductType type) => FragmentIonCaps[type].MonoisotopicMass;        public static FragmentationTerminus GetRnaTerminusType(this ProductType fragmentType)        {            switch (fragmentType)            {                case ProductType.a:                case ProductType.aWaterLoss:                case ProductType.aBaseLoss:                case ProductType.b:                case ProductType.bWaterLoss:                case ProductType.bBaseLoss:                case ProductType.c:                case ProductType.cWaterLoss:                case ProductType.cBaseLoss:                case ProductType.d:                case ProductType.dWaterLoss:                case ProductType.dBaseLoss:                    return FragmentationTerminus.FivePrime;                case ProductType.w:                case ProductType.wWaterLoss:                case ProductType.wBaseLoss:                case ProductType.x:                case ProductType.xWaterLoss:                case ProductType.xBaseLoss:                case ProductType.y:                case ProductType.yWaterLoss:                case ProductType.yBaseLoss:                case ProductType.z:                case ProductType.zWaterLoss:                case ProductType.zBaseLoss:                    return FragmentationTerminus.ThreePrime;                case ProductType.M:                    return FragmentationTerminus.None;                case ProductType.aStar:                case ProductType.aDegree:                case ProductType.bAmmoniaLoss:                case ProductType.yAmmoniaLoss:                case ProductType.zPlusOne:                case ProductType.D:                case ProductType.Ycore:                case ProductType.Y:                default:                    throw new ArgumentOutOfRangeException(nameof(fragmentType), fragmentType, null);            }        }        /// <summary>        /// Product ion types by Fragmentation Terminus        /// </summary>        private static readonly Dictionary<FragmentationTerminus, List<ProductType>>            ProductIonTypesFromSpecifiedTerminus = new Dictionary<FragmentationTerminus, List<ProductType>>            {                {                    FragmentationTerminus.FivePrime, new List<ProductType>                    {                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,                     }                },                {                    FragmentationTerminus.ThreePrime, new List<ProductType>                    {                        ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,                    }                },                {                    FragmentationTerminus.Both, new List<ProductType>                    {                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,                         ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,                        ProductType.M                    }                }            };        public static List<ProductType> GetRnaTerminusSpecificProductTypes(            this FragmentationTerminus fragmentationTerminus)        {            return ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus];        }        /// <summary>        /// Returns all product ion types based upon specified terminus        /// </summary>        /// <param name="dissociationType"></param>        /// <param name="fragmentationTerminus"></param>        /// <returns></returns>        public static List<ProductType> GetRnaTerminusSpecificProductTypesFromDissociation(            this DissociationType dissociationType, FragmentationTerminus fragmentationTerminus)        {            var terminusSpecific = fragmentationTerminus.GetRnaTerminusSpecificProductTypes();            var dissociationSpecific = dissociationType.GetRnaProductTypesFromDissociationType();            return terminusSpecific.Intersect(dissociationSpecific).ToList();        }    }}
\ No newline at end of file
+﻿using Chemistry;using MassSpectrometry;namespace Omics.Fragmentation.Oligo{    /// <summary>    /// Methods dealing with specific product type for RNA molecules    /// </summary>    public static class DissociationTypeCollection    {
+        /// <summary>        /// Product Ion types by dissociation method        /// </summary>        /// <remarks>        /// HCD ions were taken from the following paper: https://www.nature.com/articles/s41598-023-36193-2        /// Ion types below here should be validated with experimental results.        /// Base and water losses occur very frequently and may also be present in these activation types.        /// CID, UVPD, and aEPD ions were taken from the following paper: https://pubs.acs.org/doi/10.1021/acs.analchem.3c05428?ref=PDF        /// NETD ions were taken from the following paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7161943/        /// lowCID ions were taken from this Thermo Poster: https://assets.thermofisher.com/TFS-Assets/CMD/Flyers/fl-489263-asms23-optimized-fragmentation-oligonucleotides-suppresses-undesired-fragmentation-fl489263-en.pdf        /// </remarks>        public static Dictionary<DissociationType, List<ProductType>> ProductsFromDissociationType =            new Dictionary<DissociationType, List<ProductType>>()            {                { DissociationType.Unknown, new List<ProductType>() },                { DissociationType.Custom, new List<ProductType>() },                {                    DissociationType.AnyActivationType, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.aWaterLoss,                        ProductType.b, ProductType.bBaseLoss, ProductType.bWaterLoss,                        ProductType.c, ProductType.cBaseLoss, ProductType.cWaterLoss,                        ProductType.d, ProductType.dBaseLoss, ProductType.dWaterLoss,                        ProductType.w, ProductType.wBaseLoss, ProductType.wWaterLoss,                        ProductType.x, ProductType.xBaseLoss, ProductType.xWaterLoss,                        ProductType.y, ProductType.yBaseLoss, ProductType.yWaterLoss,                        ProductType.z, ProductType.zBaseLoss, ProductType.zWaterLoss,                        ProductType.M                    }                },                {                    DissociationType.CID, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,                        ProductType.y, ProductType.yWaterLoss, ProductType.M                    }                },                {                    DissociationType.HCD, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d,                        ProductType.dWaterLoss, ProductType.w, ProductType.x, ProductType.y, ProductType.z,                        ProductType.M                    }                },                {                    DissociationType.UVPD, new List<ProductType>                    {                        ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.M                    }                },                {                    DissociationType.aEPD, new List<ProductType>                    {                        ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.x, ProductType.z, ProductType.M                    }                },                {                    DissociationType.NETD, new List<ProductType>                    {                        ProductType.w, ProductType.d, ProductType.M                    }                },                {                    DissociationType.LowCID, new List<ProductType>()                    {                        ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,                        ProductType.y, ProductType.yWaterLoss, ProductType.M                    }                },                { DissociationType.IRMPD, new List<ProductType>() { } },                { DissociationType.ECD, new List<ProductType> { } },                { DissociationType.PQD, new List<ProductType> { } },                { DissociationType.ETD, new List<ProductType> { } },                { DissociationType.EThcD, new List<ProductType> { } },            };        /// <summary>        /// Returns list of products types based upon the dissociation type        /// </summary>        /// <param name="dissociationType"></param>        /// <returns></returns>        public static List<ProductType> GetRnaProductTypesFromDissociationType(this DissociationType dissociationType) =>            ProductsFromDissociationType[dissociationType];        /// <summary>        /// Mass to be added or subtracted        /// </summary>        private static readonly Dictionary<ProductType, ChemicalFormula> FragmentIonCaps =            new Dictionary<ProductType, ChemicalFormula>            {                { ProductType.a, ChemicalFormula.ParseFormula("H") },                { ProductType.aWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },                { ProductType.b, ChemicalFormula.ParseFormula("OH") },                { ProductType.bWaterLoss, ChemicalFormula.ParseFormula("H-1") },                { ProductType.c, ChemicalFormula.ParseFormula("O3H2P") },                { ProductType.cWaterLoss, ChemicalFormula.ParseFormula("O2P") },                { ProductType.d, ChemicalFormula.ParseFormula("O4H2P") },                { ProductType.dWaterLoss, ChemicalFormula.ParseFormula("O3P") },                { ProductType.w, ChemicalFormula.ParseFormula("H") },                { ProductType.wWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },                { ProductType.x, ChemicalFormula.ParseFormula("O-1H") },                { ProductType.xWaterLoss, ChemicalFormula.ParseFormula("O-2H-1") },                { ProductType.y, ChemicalFormula.ParseFormula("O-3P-1") },                { ProductType.yWaterLoss, ChemicalFormula.ParseFormula("O-4H-2P-1") },                { ProductType.z, ChemicalFormula.ParseFormula("O-4P-1") },                { ProductType.zWaterLoss, ChemicalFormula.ParseFormula("O-5H-2P-1") },                //fragment - Base chemical formula is the corresponding fragment chemical formula subtracing 1 H as H is lost when base is removed                { ProductType.aBaseLoss, ChemicalFormula.ParseFormula("H-2") }, // "H-1" -H                 { ProductType.bBaseLoss, ChemicalFormula.ParseFormula("O1H-2") }, //"OH1" -H                { ProductType.cBaseLoss, ChemicalFormula.ParseFormula("O3H-1P") }, //"O3P" -H                { ProductType.dBaseLoss, ChemicalFormula.ParseFormula("O4H-1P") }, //"O4H2P" -H                { ProductType.wBaseLoss, ChemicalFormula.ParseFormula("H-2") }, //"H"-H                { ProductType.xBaseLoss, ChemicalFormula.ParseFormula("O-1H-2") }, //"O-1H" -H                { ProductType.yBaseLoss, ChemicalFormula.ParseFormula("O-3H-2P-1") }, //"O-3P-1" -H                { ProductType.zBaseLoss, ChemicalFormula.ParseFormula("O-4H-3P-1") }, //"O-4H-1P-1" -1                { ProductType.M, new ChemicalFormula() }            };        /// <summary>        /// Returns mass shift by product type        /// </summary>        /// <param name="type"></param>        /// <returns></returns>        public static double GetRnaMassShiftFromProductType(this ProductType type) => FragmentIonCaps[type].MonoisotopicMass;        public static FragmentationTerminus GetRnaTerminusType(this ProductType fragmentType)        {            switch (fragmentType)            {                case ProductType.a:                case ProductType.aWaterLoss:                case ProductType.aBaseLoss:                case ProductType.b:                case ProductType.bWaterLoss:                case ProductType.bBaseLoss:                case ProductType.c:                case ProductType.cWaterLoss:                case ProductType.cBaseLoss:                case ProductType.d:                case ProductType.dWaterLoss:                case ProductType.dBaseLoss:                    return FragmentationTerminus.FivePrime;                case ProductType.w:                case ProductType.wWaterLoss:                case ProductType.wBaseLoss:                case ProductType.x:                case ProductType.xWaterLoss:                case ProductType.xBaseLoss:                case ProductType.y:                case ProductType.yWaterLoss:                case ProductType.yBaseLoss:                case ProductType.z:                case ProductType.zWaterLoss:                case ProductType.zBaseLoss:                    return FragmentationTerminus.ThreePrime;                case ProductType.M:                    return FragmentationTerminus.None;                case ProductType.aStar:                case ProductType.aDegree:                case ProductType.bAmmoniaLoss:                case ProductType.yAmmoniaLoss:                case ProductType.zPlusOne:                case ProductType.D:                case ProductType.Ycore:                case ProductType.Y:                default:                    throw new ArgumentOutOfRangeException(nameof(fragmentType), fragmentType, null);            }        }        /// <summary>        /// Product ion types by Fragmentation Terminus        /// </summary>        private static readonly Dictionary<FragmentationTerminus, List<ProductType>>            ProductIonTypesFromSpecifiedTerminus = new Dictionary<FragmentationTerminus, List<ProductType>>            {                {                    FragmentationTerminus.FivePrime, new List<ProductType>                    {                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,                     }                },                {                    FragmentationTerminus.ThreePrime, new List<ProductType>                    {                        ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,                    }                },                {                    FragmentationTerminus.Both, new List<ProductType>                    {                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,                         ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,                        ProductType.M                    }                }            };        public static List<ProductType> GetRnaTerminusSpecificProductTypes(            this FragmentationTerminus fragmentationTerminus)        {            return ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus];        }        /// <summary>        /// Returns all product ion types based upon specified terminus        /// </summary>        /// <param name="dissociationType"></param>        /// <param name="fragmentationTerminus"></param>        /// <returns></returns>        public static List<ProductType> GetRnaTerminusSpecificProductTypesFromDissociation(            this DissociationType dissociationType, FragmentationTerminus fragmentationTerminus)        {            var terminusSpecific = fragmentationTerminus.GetRnaTerminusSpecificProductTypes();            var dissociationSpecific = dissociationType.GetRnaProductTypesFromDissociationType();            return terminusSpecific.Intersect(dissociationSpecific).ToList();        }    }}
\ No newline at end of file
diff --git a/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs b/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
new file mode 100644
index 000000000..a741638c5
--- /dev/null
+++ b/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
@@ -0,0 +1,168 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Chemistry;
+using Omics.Digestion;
+using Omics.Modifications;
+
+namespace Transcriptomics.Digestion
+{
+    public class NucleolyticOligo : DigestionProduct
+    {
+        protected IHasChemicalFormula _fivePrimeTerminus;
+        protected IHasChemicalFormula _threePrimeTerminus;
+
+        internal NucleolyticOligo(NucleicAcid nucleicAcid, int oneBaseStartResidue,
+            int oneBasedEndResidue, int missedCleavages, CleavageSpecificity cleavageSpecificity,
+            IHasChemicalFormula? fivePrimeTerminus, IHasChemicalFormula? threePrimeTerminus)
+        : base(nucleicAcid, oneBaseStartResidue, oneBasedEndResidue, missedCleavages, cleavageSpecificity)
+        {
+            _fivePrimeTerminus = fivePrimeTerminus ?? NucleicAcid.DefaultFivePrimeTerminus;
+            _threePrimeTerminus = threePrimeTerminus ?? NucleicAcid.DefaultThreePrimeTerminus;
+        }
+
+        /// <summary>
+        /// Nucleic acid this oligo was digested from
+        /// </summary>
+        public NucleicAcid NucleicAcid
+        {
+            get => Parent as NucleicAcid;
+            protected set => Parent = value;
+        }
+
+        public override string ToString()
+        {
+            return BaseSequence;
+        }
+
+        internal IEnumerable<OligoWithSetMods> GetModifiedOligos(IEnumerable<Modification> allKnownFixedMods,
+            RnaDigestionParams digestionParams, List<Modification> variableModifications)
+        {
+            int oligoLength = OneBasedEndResidue - OneBasedStartResidue + 1;
+            int maximumVariableModificationIsoforms = digestionParams.MaxModificationIsoforms;
+            int maxModsForOligo = digestionParams.MaxMods;
+            var twoBasedPossibleVariableAndLocalizeableModifications = new Dictionary<int, List<Modification>>(oligoLength + 4);
+
+            var fivePrimeVariableMods = new List<Modification>();
+            twoBasedPossibleVariableAndLocalizeableModifications.Add(1, fivePrimeVariableMods);
+
+            var threePrimeVariableMods = new List<Modification>();
+            twoBasedPossibleVariableAndLocalizeableModifications.Add(oligoLength + 2, threePrimeVariableMods);
+
+            // collect all possible variable mods, skipping if there is a database annotated modification
+            foreach (Modification variableModification in variableModifications)
+            {
+                // Check if can be a n-term mod
+                if (CanBeFivePrime(variableModification, oligoLength) && !ModificationLocalization.UniprotModExists(NucleicAcid, 1, variableModification))
+                {
+                    fivePrimeVariableMods.Add(variableModification);
+                }
+
+                for (int r = 0; r < oligoLength; r++)
+                {
+                    if (variableModification.LocationRestriction == "Anywhere." &&
+                        ModificationLocalization.ModFits(variableModification, NucleicAcid.BaseSequence, r + 1, oligoLength, OneBasedStartResidue + r)
+                         && !ModificationLocalization.UniprotModExists(NucleicAcid, r + 1, variableModification))
+                    {
+                        if (!twoBasedPossibleVariableAndLocalizeableModifications.TryGetValue(r + 2, out List<Modification> residueVariableMods))
+                        {
+                            residueVariableMods = new List<Modification> { variableModification };
+                            twoBasedPossibleVariableAndLocalizeableModifications.Add(r + 2, residueVariableMods);
+                        }
+                        else
+                        {
+                            residueVariableMods.Add(variableModification);
+                        }
+                    }
+                }
+                // Check if can be a c-term mod
+                if (CanBeThreePrime(variableModification, oligoLength) && !ModificationLocalization.UniprotModExists(NucleicAcid, oligoLength, variableModification))
+                {
+                    threePrimeVariableMods.Add(variableModification);
+                }
+            }
+
+            // LOCALIZED MODS
+            foreach (var kvp in NucleicAcid.OneBasedPossibleLocalizedModifications)
+            {
+                bool inBounds = kvp.Key >= OneBasedStartResidue && kvp.Key <= OneBasedEndResidue;
+                if (!inBounds)
+                {
+                    continue;
+                }
+
+                int locInPeptide = kvp.Key - OneBasedStartResidue + 1;
+                foreach (Modification modWithMass in kvp.Value)
+                {
+                    if (modWithMass is Modification variableModification)
+                    {
+                        // Check if can be a n-term mod
+                        if (locInPeptide == 1 && CanBeFivePrime(variableModification, oligoLength) && !NucleicAcid.IsDecoy)
+                        {
+                            fivePrimeVariableMods.Add(variableModification);
+                        }
+
+                        int r = locInPeptide - 1;
+                        if (r >= 0 && r < oligoLength
+                            && (NucleicAcid.IsDecoy ||
+                            (ModificationLocalization.ModFits(variableModification, NucleicAcid.BaseSequence, r + 1, oligoLength, OneBasedStartResidue + r)
+                             && variableModification.LocationRestriction == "Anywhere.")))
+                        {
+                            if (!twoBasedPossibleVariableAndLocalizeableModifications.TryGetValue(r + 2, out List<Modification> residueVariableMods))
+                            {
+                                residueVariableMods = new List<Modification> { variableModification };
+                                twoBasedPossibleVariableAndLocalizeableModifications.Add(r + 2, residueVariableMods);
+                            }
+                            else
+                            {
+                                residueVariableMods.Add(variableModification);
+                            }
+                        }
+
+                        // Check if can be a c-term mod
+                        if (locInPeptide == oligoLength && CanBeThreePrime(variableModification, oligoLength) && !NucleicAcid.IsDecoy)
+                        {
+                            threePrimeVariableMods.Add(variableModification);
+                        }
+                    }
+                }
+            }
+
+            int variable_modification_isoforms = 0;
+
+            foreach (Dictionary<int, Modification> kvp in GetVariableModificationPatterns(twoBasedPossibleVariableAndLocalizeableModifications, maxModsForOligo, oligoLength))
+            {
+                int numFixedMods = 0;
+                foreach (var ok in GetFixedModsOneIsNorFivePrimeTerminus(oligoLength, allKnownFixedMods))
+                {
+                    if (!kvp.ContainsKey(ok.Key))
+                    {
+                        numFixedMods++;
+                        kvp.Add(ok.Key, ok.Value);
+                    }
+                }
+                yield return new OligoWithSetMods(NucleicAcid, digestionParams, OneBasedStartResidue, OneBasedEndResidue, MissedCleavages,
+                    CleavageSpecificityForFdrCategory, kvp, numFixedMods, _fivePrimeTerminus, _threePrimeTerminus);
+                variable_modification_isoforms++;
+                if (variable_modification_isoforms == maximumVariableModificationIsoforms)
+                {
+                    yield break;
+                }
+            }
+        }
+
+        private bool CanBeFivePrime(Modification variableModification, int peptideLength)
+        {
+            return (variableModification.LocationRestriction == "5'-terminal." || variableModification.LocationRestriction == "Oligo 5'-terminal.")
+                && ModificationLocalization.ModFits(variableModification, NucleicAcid.BaseSequence, 1, peptideLength, OneBasedStartResidue);
+        }
+
+        private bool CanBeThreePrime(Modification variableModification, int peptideLength)
+        {
+            return (variableModification.LocationRestriction == "3'-terminal." || variableModification.LocationRestriction == "Oligo 3'-terminal.")
+                && ModificationLocalization.ModFits(variableModification, NucleicAcid.BaseSequence, peptideLength, peptideLength, OneBasedStartResidue + peptideLength - 1);
+        }
+    }
+}
diff --git a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
new file mode 100644
index 000000000..92b5e501c
--- /dev/null
+++ b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
@@ -0,0 +1,336 @@
+﻿using Chemistry;
+using MassSpectrometry;
+using Omics.Digestion;
+using Omics.Fragmentation;
+using Omics.Modifications;
+using Omics;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Security.Cryptography;
+using System.Text;
+using System.Threading.Tasks;
+using Easy.Common.Extensions;
+using Omics.Fragmentation.Oligo;
+
+namespace Transcriptomics.Digestion
+{
+    public class OligoWithSetMods : NucleolyticOligo, IBioPolymerWithSetMods, INucleicAcid
+    {
+        public OligoWithSetMods(NucleicAcid nucleicAcid, RnaDigestionParams digestionParams, int oneBaseStartResidue,
+            int oneBasedEndResidue, int missedCleavages, CleavageSpecificity cleavageSpecificity,
+            Dictionary<int, Modification> allModsOneIsNTerminus, int numFixedMods, IHasChemicalFormula? fivePrimeTerminus = null,
+            IHasChemicalFormula? threePrimeTerminus = null)
+            : base(nucleicAcid, oneBaseStartResidue, oneBasedEndResidue, missedCleavages,
+            cleavageSpecificity, fivePrimeTerminus, threePrimeTerminus)
+        {
+            _digestionParams = digestionParams;
+            _allModsOneIsNterminus = allModsOneIsNTerminus;
+            NumFixedMods = numFixedMods;
+            FullSequence = this.DetermineFullSequence();
+        }
+
+        public OligoWithSetMods(string sequence, Dictionary<string, Modification> allKnownMods, int numFixedMods = 0,
+            RnaDigestionParams digestionParams = null, NucleicAcid n = null, int oneBaseStartResidue = 1, int oneBasedEndResidue = 0,
+             int missedCleavages = 0, CleavageSpecificity cleavageSpecificity = CleavageSpecificity.Full, string description = null,
+            IHasChemicalFormula? fivePrimeTerminus = null, IHasChemicalFormula? threePrimeTerminus = null)
+            : base(n, oneBaseStartResidue, oneBasedEndResidue, missedCleavages,
+                cleavageSpecificity, fivePrimeTerminus, threePrimeTerminus)
+        {
+            if (sequence.Contains("|"))
+            {
+                throw new MzLibUtil.MzLibException("Ambiguous oligo cannot be parsed from string: " + sequence);
+            }
+
+            FullSequence = sequence;
+            _baseSequence = IBioPolymerWithSetMods.GetBaseSequenceFromFullSequence(sequence);
+            GetModsAfterDeserialization(allKnownMods);
+            NumFixedMods = numFixedMods;
+            _digestionParams = digestionParams;
+
+            if (n != null)
+                Parent = n;
+        }
+
+        private RnaDigestionParams _digestionParams;
+        private Dictionary<int, Modification> _allModsOneIsNterminus;
+        private double? _monoisotopicMass;
+        private ChemicalFormula? _thisChemicalFormula;
+        private double? _mostAbundantMonoisotopicMass;
+        private IDictionary<int, List<Modification>>? _oneBasedPossibleLocalizedModifications;
+
+        public string FullSequence { get; private set; }
+        public IDigestionParams DigestionParams => _digestionParams;
+        public IHasChemicalFormula FivePrimeTerminus
+        {
+            get => _fivePrimeTerminus;
+            set
+            {
+                _fivePrimeTerminus = value;
+                _monoisotopicMass = null;
+                _thisChemicalFormula = null;
+                _mostAbundantMonoisotopicMass = null;
+            }
+        }
+
+        public IHasChemicalFormula ThreePrimeTerminus
+        {
+            get => _threePrimeTerminus;
+            set
+            {
+                _threePrimeTerminus = value;
+                _monoisotopicMass = null;
+                _thisChemicalFormula = null;
+                _mostAbundantMonoisotopicMass = null;
+            }
+        }
+
+        public double MonoisotopicMass
+        {
+            get
+            {
+                if (_monoisotopicMass is null)
+                {
+                    _monoisotopicMass = BaseSequence.Sum(nuc => Nucleotide.GetResidue(nuc).MonoisotopicMass) +
+                                        AllModsOneIsNterminus.Values.Sum(mod => mod.MonoisotopicMass.Value) +
+                                        FivePrimeTerminus.MonoisotopicMass +
+                                        ThreePrimeTerminus.MonoisotopicMass;
+                }
+                return _monoisotopicMass.Value;
+            }
+        }
+
+        public ChemicalFormula ThisChemicalFormula
+        {
+            get
+            {
+                if (_thisChemicalFormula is null)
+                {
+                    var fullFormula = new RNA(BaseSequence, FivePrimeTerminus, ThreePrimeTerminus).GetChemicalFormula();
+                    foreach (var mod in AllModsOneIsNterminus.Values)
+                    {
+                        if (mod.ChemicalFormula is null)
+                        {
+                            fullFormula = null;
+                            break;
+                        }
+                        fullFormula.Add(mod.ChemicalFormula);
+                    }
+                    _thisChemicalFormula = fullFormula;
+                }
+                return _thisChemicalFormula!;
+            }
+        }
+
+        public double MostAbundantMonoisotopicMass
+        {
+            get
+            {
+                if (_mostAbundantMonoisotopicMass is null)
+                {
+                    var distribution = IsotopicDistribution.GetDistribution(ThisChemicalFormula);
+                    double maxIntensity = distribution.Intensities.Max();
+                    _mostAbundantMonoisotopicMass = distribution.Masses[distribution.Intensities.IndexOf(maxIntensity)].RoundedDouble();
+                }
+                return _mostAbundantMonoisotopicMass.Value;
+            }
+        }
+
+        public string SequenceWithChemicalFormulas => throw new NotImplementedException();
+
+        public Dictionary<int, Modification> AllModsOneIsNterminus => _allModsOneIsNterminus;
+
+        public IDictionary<int, List<Modification>> OneBasedPossibleLocalizedModifications => _oneBasedPossibleLocalizedModifications ??=
+            _allModsOneIsNterminus.ToDictionary(p => p.Key, p => new List<Modification>() { p.Value });
+        public int NumMods => AllModsOneIsNterminus.Count;
+        public int NumFixedMods { get; }
+        public int NumVariableMods => NumMods - NumFixedMods;
+
+        /// <summary>
+        /// Generates theoretical fragments for given dissociation type for this peptide. 
+        /// The "products" parameter is filled with these fragments.
+        /// </summary>
+        public void Fragment(DissociationType dissociationType, FragmentationTerminus fragmentationTerminus,
+            List<Product> products)
+        {
+            products.Clear();
+
+            List<ProductType> fivePrimeProductTypes =
+                dissociationType.GetRnaTerminusSpecificProductTypesFromDissociation(FragmentationTerminus.FivePrime);
+            List<ProductType> threePrimeProductTypes =
+                dissociationType.GetRnaTerminusSpecificProductTypesFromDissociation(FragmentationTerminus.ThreePrime);
+
+            bool calculateFivePrime =
+                fragmentationTerminus is FragmentationTerminus.FivePrime or FragmentationTerminus.Both;
+            bool calculateThreePrime =
+                fragmentationTerminus is FragmentationTerminus.ThreePrime or FragmentationTerminus.Both;
+
+            var sequence = (Parent as NucleicAcid)!.NucleicAcidArray[(OneBasedStartResidue - 1)..OneBasedEndResidue];
+
+            // intact product ion
+            if (fragmentationTerminus is FragmentationTerminus.Both or FragmentationTerminus.None)
+                products.AddRange(GetNeutralFragments(ProductType.M, sequence));
+
+            if (calculateFivePrime)
+                foreach (var type in fivePrimeProductTypes)
+                    products.AddRange(GetNeutralFragments(type, sequence));
+
+            if (calculateThreePrime)
+                foreach (var type in threePrimeProductTypes)
+                    products.AddRange(GetNeutralFragments(type, sequence));
+        }
+
+        /// <summary>
+        /// Generates theoretical internal fragments for given dissociation type for this peptide. 
+        /// The "products" parameter is filled with these fragments.
+        /// The "minLengthOfFragments" parameter is the minimum number of nucleic acids for an internal fragment to be included
+        /// </summary>
+        public void FragmentInternally(DissociationType dissociationType, int minLengthOfFragments,
+            List<Product> products)
+        {
+            throw new NotImplementedException();
+        }
+
+        /// <summary>
+        /// Calculates all the fragments of the types you specify
+        /// </summary>
+        /// <param name="type">product type to get neutral fragments from</param>
+        /// <param name="sequence">Sequence to generate fragments from, will be calculated from the parent if left null</param>
+        /// <returns></returns>
+        public IEnumerable<Product> GetNeutralFragments(ProductType type, Nucleotide[]? sequence = null)
+        {
+            sequence ??= (Parent as NucleicAcid)!.NucleicAcidArray[(OneBasedStartResidue - 1)..OneBasedEndResidue];
+
+            if (type is ProductType.M)
+            {
+                yield return new Product(type, FragmentationTerminus.None, MonoisotopicMass, 0, 0, 0);
+                yield break;
+            }
+
+            // determine mass of piece remaining after fragmentation
+            double monoMass = type.GetRnaMassShiftFromProductType();
+
+            // determine mass of terminal cap and add to fragment
+            bool isThreePrimeTerminal = type.GetRnaTerminusType() == FragmentationTerminus.ThreePrime;
+            IHasChemicalFormula terminus = isThreePrimeTerminal ? ThreePrimeTerminus : FivePrimeTerminus;
+            monoMass += terminus.MonoisotopicMass;
+
+            // determine mass of each polymer component that is contained within the fragment and add to fragment
+            bool first = true; //set first to true to hand the terminus mod first
+            for (int i = 0; i <= BaseSequence.Length - 1; i++)
+            {
+                int naIndex = isThreePrimeTerminal ? Length - i : i - 1;
+                if (first)
+                {
+                    first = false; //set to false so only handled once
+                    continue;
+                }
+                monoMass += sequence[naIndex].MonoisotopicMass;
+
+                if (i < 1)
+                    continue;
+
+                // add side-chain mod
+                if (AllModsOneIsNterminus.TryGetValue(naIndex + 2, out Modification mod))
+                {
+                    monoMass += mod.MonoisotopicMass ?? 0;
+                }
+
+                var previousNucleotide = sequence[naIndex];
+
+                double neutralLoss = 0;
+                if (type.ToString().Contains("Base"))
+                {
+                    neutralLoss = previousNucleotide.BaseChemicalFormula.MonoisotopicMass;
+                }
+
+                yield return new Product(type,
+                    isThreePrimeTerminal ? FragmentationTerminus.ThreePrime : FragmentationTerminus.FivePrime,
+                    monoMass - neutralLoss, i,
+                    isThreePrimeTerminal ? BaseSequence.Length - i : i, 0, null, 0);
+            }
+        }
+
+        public IBioPolymerWithSetMods Localize(int j, double massToLocalize)
+        {
+            var dictWithLocalizedMass = new Dictionary<int, Modification>(AllModsOneIsNterminus);
+            double massOfExistingMod = 0;
+            if (dictWithLocalizedMass.TryGetValue(j + 2, out Modification modToReplace))
+            {
+                massOfExistingMod = (double)modToReplace.MonoisotopicMass;
+                dictWithLocalizedMass.Remove(j + 2);
+            }
+
+            dictWithLocalizedMass.Add(j + 2, new Modification(_locationRestriction: "Anywhere.", _monoisotopicMass: massToLocalize + massOfExistingMod));
+
+            var peptideWithLocalizedMass = new OligoWithSetMods(NucleicAcid, _digestionParams, OneBasedStartResidue, OneBasedEndResidue, MissedCleavages,
+                CleavageSpecificityForFdrCategory, dictWithLocalizedMass, NumFixedMods, FivePrimeTerminus, ThreePrimeTerminus);
+
+            return peptideWithLocalizedMass;
+        }
+
+        private void GetModsAfterDeserialization(Dictionary<string, Modification> idToMod)
+        {
+            _allModsOneIsNterminus = new Dictionary<int, Modification>();
+            int currentModStart = 0;
+            int currentModificationLocation = 1;
+            bool currentlyReadingMod = false;
+            int bracketCount = 0;
+
+            for (int r = 0; r < FullSequence.Length; r++)
+            {
+                char c = FullSequence[r];
+                if (c == '[')
+                {
+                    currentlyReadingMod = true;
+                    if (bracketCount == 0)
+                    {
+                        currentModStart = r + 1;
+                    }
+
+                    bracketCount++;
+                }
+                else if (c == ']')
+                {
+                    string modId = null;
+                    bracketCount--;
+                    if (bracketCount == 0)
+                    {
+                        try
+                        {
+                            //remove the beginning section (e.g. "Fixed", "Variable", "Uniprot")
+                            string modString = FullSequence.Substring(currentModStart, r - currentModStart);
+                            int splitIndex = modString.IndexOf(':');
+                            string modType = modString.Substring(0, splitIndex);
+                            modId = modString.Substring(splitIndex + 1, modString.Length - splitIndex - 1);
+                        }
+                        catch (Exception e)
+                        {
+                            throw new MzLibUtil.MzLibException(
+                                "Error while trying to parse string into peptide: " + e.Message);
+                        }
+
+                        if (!idToMod.TryGetValue(modId, out Modification mod))
+                        {
+                            throw new MzLibUtil.MzLibException(
+                                "Could not find modification while reading string: " + FullSequence);
+                        }
+
+                        if (mod.LocationRestriction.Contains("C-terminal.") && r == FullSequence.Length - 1)
+                        {
+                            currentModificationLocation = BaseSequence.Length + 2;
+                        }
+
+                        _allModsOneIsNterminus.Add(currentModificationLocation, mod);
+                        currentlyReadingMod = false;
+                    }
+                }
+                else if (!currentlyReadingMod)
+                {
+                    currentModificationLocation++;
+                }
+                //else do nothing
+            }
+        }
+    }
+}
diff --git a/mzLib/Transcriptomics/Digestion/RnaDigestionParams.cs b/mzLib/Transcriptomics/Digestion/RnaDigestionParams.cs
new file mode 100644
index 000000000..379e48fa9
--- /dev/null
+++ b/mzLib/Transcriptomics/Digestion/RnaDigestionParams.cs
@@ -0,0 +1,51 @@
+﻿using Omics.Digestion;
+using Omics.Fragmentation;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Transcriptomics.Digestion
+{
+    public class RnaDigestionParams : IDigestionParams
+    {
+
+        // this parameterless constructor needs to exist to read the toml.
+        // if you can figure out a way to get rid of it, feel free...
+        public RnaDigestionParams() : this("top-down")
+        {
+        }
+
+        public RnaDigestionParams(string rnase = "top-down", int maxMissedCleavages = 0, int minLength = 3,
+            int maxLength = int.MaxValue, int maxModificationIsoforms = 1024, int maxMods = 2,
+            FragmentationTerminus fragmentationTerminus = FragmentationTerminus.Both)
+        {
+            Rnase = RnaseDictionary.Dictionary[rnase];
+            MaxMissedCleavages = maxMissedCleavages;
+            MinLength = minLength;
+            MaxLength = maxLength;
+            MaxMods = maxMods;
+            MaxModificationIsoforms = maxModificationIsoforms;
+            FragmentationTerminus = fragmentationTerminus;
+        }
+
+        public int MaxMissedCleavages { get; set; }
+        public int MinLength { get; set; }
+        public int MaxLength { get; set; }
+        public int MaxModificationIsoforms { get; set; }
+        public int MaxMods { get; set; }
+        public DigestionAgent DigestionAgent => Rnase;
+        public Rnase Rnase { get; private set; }
+        public FragmentationTerminus FragmentationTerminus { get; set; }
+        public CleavageSpecificity SearchModeType { get; set; } = CleavageSpecificity.Full;
+        public IDigestionParams Clone(FragmentationTerminus? newTerminus = null)
+        {
+            return newTerminus.HasValue
+                ? new RnaDigestionParams(Rnase.Name, MaxMissedCleavages, MinLength, MaxLength,
+                    MaxModificationIsoforms, MaxMods, newTerminus.Value)
+                : new RnaDigestionParams(Rnase.Name, MaxMissedCleavages, MinLength, MaxLength,
+                    MaxModificationIsoforms, MaxMods, FragmentationTerminus);
+        }
+    }
+}
diff --git a/mzLib/Transcriptomics/Digestion/Rnase.cs b/mzLib/Transcriptomics/Digestion/Rnase.cs
index 646bbc8d1..3670f1b3c 100644
--- a/mzLib/Transcriptomics/Digestion/Rnase.cs
+++ b/mzLib/Transcriptomics/Digestion/Rnase.cs
@@ -1,4 +1,5 @@
-﻿using Omics.Digestion;
+﻿using Chemistry;
+using Omics.Digestion;
 using Omics.Modifications;
 
 namespace Transcriptomics.Digestion
@@ -13,10 +14,59 @@ public Rnase(string name, CleavageSpecificity cleaveSpecificity, List<DigestionM
             DigestionMotifs = motifList;
         }
 
-        // TODO: Coming soon to a mzLib near you
-        // public List<NucleolyticOligo> GetUnmodifiedOligos(NucleicAcid nucleicAcid, int maxMissedCleavages, int minLength, int maxLength)
-        // private IEnumerable<NucleolyticOligo> FullDigestion(NucleicAcid nucleicAcid, int maxMissedCleavages, int minLength, int maxLength)
-        
+        public List<NucleolyticOligo> GetUnmodifiedOligos(NucleicAcid nucleicAcid, int maxMissedCleavages, int minLength,
+            int maxLength)
+        {
+            var oligos = new List<NucleolyticOligo>();
+
+            // top down
+            if (CleavageSpecificity == CleavageSpecificity.None)
+            {
+                if (ValidLength(nucleicAcid.Length, minLength, maxLength))
+                    oligos.Add(new NucleolyticOligo(nucleicAcid, 1, nucleicAcid.Length,
+                        0, CleavageSpecificity.Full, nucleicAcid.FivePrimeTerminus, nucleicAcid.ThreePrimeTerminus));
+            }
+            // full cleavage
+            else if (CleavageSpecificity == CleavageSpecificity.Full)
+            {
+                oligos.AddRange(FullDigestion(nucleicAcid, maxMissedCleavages, minLength, maxLength));
+            }
+            else
+            {
+                throw new ArgumentException(
+                    "Cleave Specificity not defined for Rna digestion, currently supports Full and None");
+            }
+
+            return oligos;
+        }
+
+        private IEnumerable<NucleolyticOligo> FullDigestion(NucleicAcid nucleicAcid, int maxMissedCleavages,
+            int minLength, int maxLength)
+        {
+            List<int> oneBasedIndicesToCleaveAfter = GetDigestionSiteIndices(nucleicAcid.BaseSequence);
+            for (int missedCleavages = 0; missedCleavages <= maxMissedCleavages; missedCleavages++)
+            {
+                for (int i = 0; i < oneBasedIndicesToCleaveAfter.Count - missedCleavages - 1; i++)
+                {
+                    if (ValidLength(oneBasedIndicesToCleaveAfter[i + missedCleavages + 1] - oneBasedIndicesToCleaveAfter[i],
+                            minLength, maxLength))
+                    {
+                        int oneBasedStartResidue = oneBasedIndicesToCleaveAfter[i] + 1;
+                        int oneBasedEndResidue = oneBasedIndicesToCleaveAfter[i + missedCleavages + 1];
+
+                        // contains original 5' terminus ? keep it : set to OH
+                        IHasChemicalFormula fivePrimeTerminus = oneBasedStartResidue == 1 ? nucleicAcid.FivePrimeTerminus : ChemicalFormula.ParseFormula("O-3P-1");
+
+                        // contains original 3' terminus ? keep it : set to phosphate
+                        IHasChemicalFormula threePrimeTerminus = oneBasedEndResidue == nucleicAcid.Length ? nucleicAcid.ThreePrimeTerminus : ChemicalFormula.ParseFormula("H2O4P");
+
+                        yield return new NucleolyticOligo(nucleicAcid, oneBasedStartResidue, oneBasedEndResidue,
+                            missedCleavages, CleavageSpecificity.Full, fivePrimeTerminus, threePrimeTerminus);
+                    }
+                }
+            }
+        }
+
         public bool Equals(Rnase? other)
         {
             if (ReferenceEquals(null, other)) return false;
diff --git a/mzLib/Transcriptomics/Interfaces/INucleicAcid.cs b/mzLib/Transcriptomics/Interfaces/INucleicAcid.cs
index 3d55d2ef4..d2052aee3 100644
--- a/mzLib/Transcriptomics/Interfaces/INucleicAcid.cs
+++ b/mzLib/Transcriptomics/Interfaces/INucleicAcid.cs
@@ -4,7 +4,7 @@
 
 namespace Transcriptomics
 {
-    public interface INucleicAcid : IHasChemicalFormula, IBioPolymer
+    public interface INucleicAcid : IHasChemicalFormula
     {
         /// <summary>
         /// The amino acid sequence
diff --git a/mzLib/Transcriptomics/NucleicAcid.cs b/mzLib/Transcriptomics/NucleicAcid.cs
new file mode 100644
index 000000000..ef6b74cf9
--- /dev/null
+++ b/mzLib/Transcriptomics/NucleicAcid.cs
@@ -0,0 +1,356 @@
+﻿using Chemistry;
+using Omics.Digestion;
+using Omics.Modifications;
+using Omics;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Transcriptomics.Digestion;
+
+namespace Transcriptomics
+{
+    /// <summary>
+    /// A linear polymer of Nucleic acids
+    /// </summary>
+    public abstract class NucleicAcid : INucleicAcid, IBioPolymer, IEquatable<NucleicAcid>
+    {
+
+        #region Static Properties
+
+        /// <summary>
+        /// The default chemical formula of the five prime (hydroxyl group)
+        /// </summary>
+        /// <remarks>
+        /// This means that the five prime cap will remove the excess components of first nucleotides
+        /// phospho group, leaving only the hydroxyl. This formula will be used for the five prime cap, unless
+        /// the nucleic acid is constructed with a different chemical formula
+        /// </remarks>
+        public static readonly ChemicalFormula DefaultFivePrimeTerminus = ChemicalFormula.ParseFormula("O-3P-1");
+
+        /// <summary>
+        /// The default chemical formula of the three prime terminus (hydroxyl group)
+        /// </summary>
+        /// <remarks>
+        /// This is used to account for the mass of the additional hydroxyl group at the three end of most oligonucleotides.
+        /// This formula will be used for the three prime cap, unless the nucleic acid is constructed with a different
+        /// chemical formula
+        /// </remarks>
+        public static readonly ChemicalFormula DefaultThreePrimeTerminus = ChemicalFormula.ParseFormula("OH");
+
+        #endregion
+
+        #region Constuctors
+
+        protected NucleicAcid(string sequence, IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null,
+            IDictionary<int, List<Modification>>? oneBasedPossibleLocalizedModifications = null)
+        {
+            MonoisotopicMass = 0;
+            Length = sequence.Length;
+            _nucleicAcids = new Nucleotide[Length];
+            ThreePrimeTerminus = threePrimeTerm ??= DefaultThreePrimeTerminus;
+            FivePrimeTerminus = fivePrimeTerm ??= DefaultFivePrimeTerminus;
+            _oneBasedPossibleLocalizedModifications = oneBasedPossibleLocalizedModifications ?? new Dictionary<int, List<Modification>>();
+            GeneNames = new List<Tuple<string, string>>();
+
+
+            ParseSequence(sequence);
+        }
+
+        protected NucleicAcid(string sequence, string name, string identifier, string organism, string databaseFilePath,
+            IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null,
+            IDictionary<int, List<Modification>>? oneBasedPossibleLocalizedModifications = null,
+            bool isContaminant = false, bool isDecoy = false,
+            Dictionary<string, string>? additionalDatabaseFields = null)
+            : this(sequence, fivePrimeTerm, threePrimeTerm, oneBasedPossibleLocalizedModifications)
+        {
+            Name = name;
+            DatabaseFilePath = databaseFilePath;
+            IsDecoy = isDecoy;
+            IsContaminant = isContaminant;
+            Organism = organism;
+            Accession = identifier;
+            AdditionalDatabaseFields = additionalDatabaseFields;
+        }
+
+        #endregion
+
+        #region Private Properties
+
+        /// <summary>
+        /// The 5-Prime chemical formula cap
+        /// </summary>
+        private IHasChemicalFormula _5PrimeTerminus;
+
+        /// <summary>
+        /// The 3-Prime chemical formula cap
+        /// </summary>
+        private IHasChemicalFormula _3PrimeTerminus;
+
+        /// <summary>
+        /// All of the nucleic acid residues indexed by position from 5- to 3-prime.
+        /// </summary>
+        private Nucleotide[] _nucleicAcids;
+
+        /// <summary>
+        /// The nucleic acid sequence. Is ignored if 'StoreSequenceString' is false
+        /// </summary>
+        private string _sequence;
+
+        private IDictionary<int, List<Modification>> _oneBasedPossibleLocalizedModifications;
+
+        #endregion
+
+
+        #region Public Properties
+
+        /// <summary>
+        /// Gets or sets the 5' terminus of this nucleic acid polymer
+        /// </summary>
+        public IHasChemicalFormula FivePrimeTerminus
+        {
+            get => _5PrimeTerminus;
+            set => ReplaceTerminus(ref _5PrimeTerminus, value);
+        }
+
+        /// <summary>
+        /// Gets or sets the 3' terminus of this nucleic acid polymer
+        /// </summary>
+        public IHasChemicalFormula ThreePrimeTerminus
+        {
+            get => _3PrimeTerminus;
+            set => ReplaceTerminus(ref _3PrimeTerminus, value);
+        }
+
+        /// <summary>
+        /// Gets the number of nucleic acids in this nucleic acid polymer
+        /// </summary>
+        public int Length { get; private set; }
+
+
+        // TODO: These interface members
+        public string Name { get; }
+        public string FullName => Name; // TODO: Consider if this needs to be different from the name
+        public string DatabaseFilePath { get; }
+        public bool IsDecoy { get; }
+        public bool IsContaminant { get; }
+        public string Accession { get; }
+
+        public IDictionary<int, List<Modification>> OneBasedPossibleLocalizedModifications => _oneBasedPossibleLocalizedModifications;
+        public string Organism { get; }
+
+        /// <summary>
+        /// The list of gene names consists of tuples, where Item1 is the type of gene name, and Item2 is the name. There may be many genes and names of a certain type produced when reading an XML protein database.
+        /// </summary>
+        public IEnumerable<Tuple<string, string>> GeneNames { get; }
+        public Dictionary<string, string>? AdditionalDatabaseFields { get; }
+
+        /// <summary>
+        /// The total monoisotopic mass of this peptide and all of its modifications
+        /// </summary>
+        public double MonoisotopicMass { get; private set; }
+
+        /// <summary>
+        /// Returns a copy of the nucleic acid array, used for -base mass calculations.
+        /// </summary>
+        public Nucleotide[] NucleicAcidArray => _nucleicAcids;
+
+        public ChemicalFormula ThisChemicalFormula => GetChemicalFormula();
+
+        #endregion
+
+        #region Nucleic Acid Sequence
+
+        /// <summary>
+        /// Gets the base nucleic acid sequence
+        /// </summary>
+        public string BaseSequence
+        {
+            get
+            {
+                // Generate the sequence if the stored version is null or empty
+                if (string.IsNullOrEmpty(_sequence))
+                {
+                    _sequence = new string(_nucleicAcids.Select(na => na.Letter).ToArray());
+                }
+
+                return _sequence;
+            }
+        }
+
+        public char this[int zeroBasedIndex] => BaseSequence[zeroBasedIndex];
+
+        #endregion
+
+        #region Digestion
+
+        public IEnumerable<IBioPolymerWithSetMods> Digest(IDigestionParams digestionParameters, List<Modification> allKnownFixedMods,
+            List<Modification> variableModifications, List<SilacLabel> silacLabels = null, (SilacLabel startLabel, SilacLabel endLabel)? turnoverLabels = null,
+            bool topDownTruncationSearch = false)
+        {
+            if (digestionParameters is not RnaDigestionParams digestionParams)
+                throw new ArgumentException(
+                    "DigestionParameters must be of type DigestionParams for protein digestion");
+            allKnownFixedMods ??= new();
+            variableModifications ??= new();
+
+            // digest based upon base sequence
+            foreach (var unmodifiedOligo in digestionParams.Rnase.GetUnmodifiedOligos(this,
+                         digestionParams.MaxMissedCleavages, digestionParams.MinLength, digestionParams.MaxLength))
+            {
+                // add fixed and variable mods to base sequence digestion products
+                foreach (var modifiedOligo in unmodifiedOligo.GetModifiedOligos(allKnownFixedMods, digestionParams,
+                             variableModifications))
+                {
+                    yield return modifiedOligo;
+                }
+            }
+        }
+
+        public IEnumerable<OligoWithSetMods> Digest(RnaDigestionParams digestionParameters,
+            List<Modification> allKnownFixedMods,
+            List<Modification> variableModifications, List<SilacLabel> silacLabels = null,
+            (SilacLabel startLabel, SilacLabel endLabel)? turnoverLabels = null,
+            bool topDownTruncationSearch = false)
+        {
+            return Digest((IDigestionParams)digestionParameters, allKnownFixedMods, variableModifications, silacLabels, turnoverLabels, topDownTruncationSearch)
+                .Cast<OligoWithSetMods>();
+        }
+
+        #endregion
+
+        #region Electrospray
+
+        public IEnumerable<double> GetElectrospraySeries(int minCharge, int maxCharge)
+        {
+            for (int i = minCharge; i < maxCharge; i++)
+            {
+                yield return this.ToMz(i);
+            }
+        }
+
+        #endregion
+
+        #region Chemical Formula
+
+        public ChemicalFormula GetChemicalFormula()
+        {
+            var formula = new ChemicalFormula();
+
+            // Handle 5'-Terminus
+            formula.Add(FivePrimeTerminus.ThisChemicalFormula);
+
+            // Handle 3'-Terminus
+            formula.Add(ThreePrimeTerminus.ThisChemicalFormula);
+
+            // Handle Nucleic Acid Residues
+            for (int i = 0; i < Length; i++)
+            {
+                formula.Add(_nucleicAcids[i].ThisChemicalFormula);
+            }
+
+            return formula;
+        }
+
+        #endregion
+
+        #region Private Methods
+
+        bool ReplaceTerminus(ref IHasChemicalFormula terminus, IHasChemicalFormula value)
+        {
+            if (Equals(value, terminus))
+                return false;
+
+            if (terminus != null)
+                MonoisotopicMass -= terminus.MonoisotopicMass;
+
+            terminus = value;
+
+            if (value != null)
+                MonoisotopicMass += value.MonoisotopicMass;
+
+            return true;
+        }
+
+        /// <summary>
+        /// Parses a string sequence of nucleic acids characters into a peptide object
+        /// </summary>
+        /// <param name="sequence"></param>
+        /// <returns></returns>
+        private bool ParseSequence(string sequence)
+        {
+            if (string.IsNullOrEmpty(sequence))
+                return false;
+
+            int index = 0;
+
+            double monoMass = 0;
+            ChemicalFormula chemFormula = new();
+
+            StringBuilder sb = null;
+            sb = new StringBuilder(sequence.Length);
+
+            foreach (char letter in sequence)
+            {
+                Nucleotide residue;
+                if (Nucleotide.TryGetResidue(letter, out residue))
+                {
+                    _nucleicAcids[index++] = residue;
+                    sb.Append(residue.Letter);
+                    monoMass += residue.MonoisotopicMass;
+                }
+                else
+                {
+                    switch (letter)
+                    {
+                        case ' ': // ignore spaces
+                            break;
+
+                        case '*': // ignore *
+                            break;
+
+                        default:
+                            throw new ArgumentException(string.Format(
+                                "Nucleic Acid Letter {0} does not exist in the Nucleic Acid Dictionary. {0} is also not a valid character",
+                                letter));
+                    }
+                }
+            }
+
+            _sequence = sb.ToString();
+            Length = index;
+            MonoisotopicMass += monoMass;
+            Array.Resize(ref _nucleicAcids, Length);
+
+            return true;
+        }
+
+        #endregion
+
+        #region Interface Implemntations and Overrides
+
+        public bool Equals(NucleicAcid? other)
+        {
+            if (ReferenceEquals(null, other)) return false;
+            if (ReferenceEquals(this, other)) return true;
+            return _5PrimeTerminus.Equals(other._5PrimeTerminus)
+                   && _3PrimeTerminus.Equals(other._3PrimeTerminus);
+        }
+
+        public override bool Equals(object? obj)
+        {
+            if (ReferenceEquals(null, obj)) return false;
+            if (ReferenceEquals(this, obj)) return true;
+            if (obj.GetType() != this.GetType()) return false;
+            return Equals((NucleicAcid)obj);
+        }
+
+        public override int GetHashCode()
+        {
+            return HashCode.Combine(_5PrimeTerminus, _3PrimeTerminus, _sequence);
+        }
+
+        #endregion
+    }
+}
diff --git a/mzLib/Transcriptomics/RNA.cs b/mzLib/Transcriptomics/RNA.cs
new file mode 100644
index 000000000..3e72c1f14
--- /dev/null
+++ b/mzLib/Transcriptomics/RNA.cs
@@ -0,0 +1,51 @@
+﻿using Chemistry;
+using Omics.Modifications;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Transcriptomics
+{
+    public class RNA : NucleicAcid
+    {
+        /// <summary>
+        /// For constructing RNA from a string
+        /// </summary>
+        /// <param name="sequence"></param>
+        /// <param name="fivePrimeTerm"></param>
+        /// <param name="threePrimeTerm"></param>
+        /// <param name="oneBasedPossibleLocalizedModifications"></param>
+        public RNA(string sequence, IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null,
+            IDictionary<int, List<Modification>>? oneBasedPossibleLocalizedModifications = null)
+            : base(sequence, fivePrimeTerm, threePrimeTerm, oneBasedPossibleLocalizedModifications)
+        {
+        }
+
+        /// <summary>
+        /// For use with RNA loaded from a database
+        /// </summary>
+        /// <param name="sequence"></param>
+        /// <param name="name"></param>
+        /// <param name="identifier"></param>
+        /// <param name="organism"></param>
+        /// <param name="databaseFilePath"></param>
+        /// <param name="fivePrimeTerminus"></param>
+        /// <param name="threePrimeTerminus"></param>
+        /// <param name="oneBasedPossibleModifications"></param>
+        /// <param name="isContaminant"></param>
+        /// <param name="isDecoy"></param>
+        /// <param name="databaseAdditionalFields"></param>
+        public RNA(string sequence, string name, string identifier, string organism, string databaseFilePath,
+            IHasChemicalFormula? fivePrimeTerminus = null, IHasChemicalFormula? threePrimeTerminus = null,
+            IDictionary<int, List<Modification>>? oneBasedPossibleModifications = null,
+            bool isContaminant = false, bool isDecoy = false,
+            Dictionary<string, string>? databaseAdditionalFields = null)
+            : base(sequence, name, identifier, organism, databaseFilePath, fivePrimeTerminus, threePrimeTerminus,
+                oneBasedPossibleModifications, isContaminant, isDecoy, databaseAdditionalFields)
+        {
+
+        }
+    }
+}
diff --git a/mzLib/mzLib.sln.DotSettings b/mzLib/mzLib.sln.DotSettings
index 78477fa52..06594535d 100644
--- a/mzLib/mzLib.sln.DotSettings
+++ b/mzLib/mzLib.sln.DotSettings
@@ -1,11 +1,14 @@
- <wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
+﻿ <wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=decon/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Deconv/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Deconvolute/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=deconvoluted/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Deconvoluter/@EntryIndexedValue">True</s:Boolean>
+	<s:Boolean x:Key="/Default/UserDictionary/Words/=Nucleolytic/@EntryIndexedValue">True</s:Boolean>
+	<s:Boolean x:Key="/Default/UserDictionary/Words/=Oligo/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Prsm/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Toppic/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Monoisotopic/@EntryIndexedValue">True</s:Boolean>
+	<s:Boolean x:Key="/Default/UserDictionary/Words/=Transcriptomics/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Winsorize/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Winsorized/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>
\ No newline at end of file

From de33dfa648cae8b09e1efb309d12161b59f54186 Mon Sep 17 00:00:00 2001
From: nbollis <nbollis@comcast.net>
Date: Thu, 19 Sep 2024 17:30:43 -0500
Subject: [PATCH 02/17] Implemented all tests

---
 .../Oligo/DissociationTypeCollection.cs       |   4 +-
 mzLib/Test/Transcriptomics/TestDigestion.cs   | 780 ++++++++++++++++++
 .../Test/Transcriptomics/TestFragmentation.cs | 239 ++++++
 mzLib/Test/Transcriptomics/TestNucleicAcid.cs | 171 ++++
 mzLib/Test/Transcriptomics/TestProductType.cs | 280 +++++++
 mzLib/Test/Transcriptomics/TestRnase.cs       |   3 +-
 mzLib/Transcriptomics/NucleicAcid.cs          |  33 +-
 7 files changed, 1498 insertions(+), 12 deletions(-)
 create mode 100644 mzLib/Test/Transcriptomics/TestDigestion.cs
 create mode 100644 mzLib/Test/Transcriptomics/TestFragmentation.cs
 create mode 100644 mzLib/Test/Transcriptomics/TestNucleicAcid.cs
 create mode 100644 mzLib/Test/Transcriptomics/TestProductType.cs

diff --git a/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
index b2b7cd891..3bc08d089 100644
--- a/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
+++ b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
@@ -1,2 +1,4 @@
 ﻿using Chemistry;using MassSpectrometry;namespace Omics.Fragmentation.Oligo{    /// <summary>    /// Methods dealing with specific product type for RNA molecules    /// </summary>    public static class DissociationTypeCollection    {
-        /// <summary>        /// Product Ion types by dissociation method        /// </summary>        /// <remarks>        /// HCD ions were taken from the following paper: https://www.nature.com/articles/s41598-023-36193-2        /// Ion types below here should be validated with experimental results.        /// Base and water losses occur very frequently and may also be present in these activation types.        /// CID, UVPD, and aEPD ions were taken from the following paper: https://pubs.acs.org/doi/10.1021/acs.analchem.3c05428?ref=PDF        /// NETD ions were taken from the following paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7161943/        /// lowCID ions were taken from this Thermo Poster: https://assets.thermofisher.com/TFS-Assets/CMD/Flyers/fl-489263-asms23-optimized-fragmentation-oligonucleotides-suppresses-undesired-fragmentation-fl489263-en.pdf        /// </remarks>        public static Dictionary<DissociationType, List<ProductType>> ProductsFromDissociationType =            new Dictionary<DissociationType, List<ProductType>>()            {                { DissociationType.Unknown, new List<ProductType>() },                { DissociationType.Custom, new List<ProductType>() },                {                    DissociationType.AnyActivationType, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.aWaterLoss,                        ProductType.b, ProductType.bBaseLoss, ProductType.bWaterLoss,                        ProductType.c, ProductType.cBaseLoss, ProductType.cWaterLoss,                        ProductType.d, ProductType.dBaseLoss, ProductType.dWaterLoss,                        ProductType.w, ProductType.wBaseLoss, ProductType.wWaterLoss,                        ProductType.x, ProductType.xBaseLoss, ProductType.xWaterLoss,                        ProductType.y, ProductType.yBaseLoss, ProductType.yWaterLoss,                        ProductType.z, ProductType.zBaseLoss, ProductType.zWaterLoss,                        ProductType.M                    }                },                {                    DissociationType.CID, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,                        ProductType.y, ProductType.yWaterLoss, ProductType.M                    }                },                {                    DissociationType.HCD, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d,                        ProductType.dWaterLoss, ProductType.w, ProductType.x, ProductType.y, ProductType.z,                        ProductType.M                    }                },                {                    DissociationType.UVPD, new List<ProductType>                    {                        ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.M                    }                },                {                    DissociationType.aEPD, new List<ProductType>                    {                        ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.x, ProductType.z, ProductType.M                    }                },                {                    DissociationType.NETD, new List<ProductType>                    {                        ProductType.w, ProductType.d, ProductType.M                    }                },                {                    DissociationType.LowCID, new List<ProductType>()                    {                        ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,                        ProductType.y, ProductType.yWaterLoss, ProductType.M                    }                },                { DissociationType.IRMPD, new List<ProductType>() { } },                { DissociationType.ECD, new List<ProductType> { } },                { DissociationType.PQD, new List<ProductType> { } },                { DissociationType.ETD, new List<ProductType> { } },                { DissociationType.EThcD, new List<ProductType> { } },            };        /// <summary>        /// Returns list of products types based upon the dissociation type        /// </summary>        /// <param name="dissociationType"></param>        /// <returns></returns>        public static List<ProductType> GetRnaProductTypesFromDissociationType(this DissociationType dissociationType) =>            ProductsFromDissociationType[dissociationType];        /// <summary>        /// Mass to be added or subtracted        /// </summary>        private static readonly Dictionary<ProductType, ChemicalFormula> FragmentIonCaps =            new Dictionary<ProductType, ChemicalFormula>            {                { ProductType.a, ChemicalFormula.ParseFormula("H") },                { ProductType.aWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },                { ProductType.b, ChemicalFormula.ParseFormula("OH") },                { ProductType.bWaterLoss, ChemicalFormula.ParseFormula("H-1") },                { ProductType.c, ChemicalFormula.ParseFormula("O3H2P") },                { ProductType.cWaterLoss, ChemicalFormula.ParseFormula("O2P") },                { ProductType.d, ChemicalFormula.ParseFormula("O4H2P") },                { ProductType.dWaterLoss, ChemicalFormula.ParseFormula("O3P") },                { ProductType.w, ChemicalFormula.ParseFormula("H") },                { ProductType.wWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },                { ProductType.x, ChemicalFormula.ParseFormula("O-1H") },                { ProductType.xWaterLoss, ChemicalFormula.ParseFormula("O-2H-1") },                { ProductType.y, ChemicalFormula.ParseFormula("O-3P-1") },                { ProductType.yWaterLoss, ChemicalFormula.ParseFormula("O-4H-2P-1") },                { ProductType.z, ChemicalFormula.ParseFormula("O-4P-1") },                { ProductType.zWaterLoss, ChemicalFormula.ParseFormula("O-5H-2P-1") },                //fragment - Base chemical formula is the corresponding fragment chemical formula subtracing 1 H as H is lost when base is removed                { ProductType.aBaseLoss, ChemicalFormula.ParseFormula("H-2") }, // "H-1" -H                 { ProductType.bBaseLoss, ChemicalFormula.ParseFormula("O1H-2") }, //"OH1" -H                { ProductType.cBaseLoss, ChemicalFormula.ParseFormula("O3H-1P") }, //"O3P" -H                { ProductType.dBaseLoss, ChemicalFormula.ParseFormula("O4H-1P") }, //"O4H2P" -H                { ProductType.wBaseLoss, ChemicalFormula.ParseFormula("H-2") }, //"H"-H                { ProductType.xBaseLoss, ChemicalFormula.ParseFormula("O-1H-2") }, //"O-1H" -H                { ProductType.yBaseLoss, ChemicalFormula.ParseFormula("O-3H-2P-1") }, //"O-3P-1" -H                { ProductType.zBaseLoss, ChemicalFormula.ParseFormula("O-4H-3P-1") }, //"O-4H-1P-1" -1                { ProductType.M, new ChemicalFormula() }            };        /// <summary>        /// Returns mass shift by product type        /// </summary>        /// <param name="type"></param>        /// <returns></returns>        public static double GetRnaMassShiftFromProductType(this ProductType type) => FragmentIonCaps[type].MonoisotopicMass;        public static FragmentationTerminus GetRnaTerminusType(this ProductType fragmentType)        {            switch (fragmentType)            {                case ProductType.a:                case ProductType.aWaterLoss:                case ProductType.aBaseLoss:                case ProductType.b:                case ProductType.bWaterLoss:                case ProductType.bBaseLoss:                case ProductType.c:                case ProductType.cWaterLoss:                case ProductType.cBaseLoss:                case ProductType.d:                case ProductType.dWaterLoss:                case ProductType.dBaseLoss:                    return FragmentationTerminus.FivePrime;                case ProductType.w:                case ProductType.wWaterLoss:                case ProductType.wBaseLoss:                case ProductType.x:                case ProductType.xWaterLoss:                case ProductType.xBaseLoss:                case ProductType.y:                case ProductType.yWaterLoss:                case ProductType.yBaseLoss:                case ProductType.z:                case ProductType.zWaterLoss:                case ProductType.zBaseLoss:                    return FragmentationTerminus.ThreePrime;                case ProductType.M:                    return FragmentationTerminus.None;                case ProductType.aStar:                case ProductType.aDegree:                case ProductType.bAmmoniaLoss:                case ProductType.yAmmoniaLoss:                case ProductType.zPlusOne:                case ProductType.D:                case ProductType.Ycore:                case ProductType.Y:                default:                    throw new ArgumentOutOfRangeException(nameof(fragmentType), fragmentType, null);            }        }        /// <summary>        /// Product ion types by Fragmentation Terminus        /// </summary>        private static readonly Dictionary<FragmentationTerminus, List<ProductType>>            ProductIonTypesFromSpecifiedTerminus = new Dictionary<FragmentationTerminus, List<ProductType>>            {                {                    FragmentationTerminus.FivePrime, new List<ProductType>                    {                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,                     }                },                {                    FragmentationTerminus.ThreePrime, new List<ProductType>                    {                        ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,                    }                },                {                    FragmentationTerminus.Both, new List<ProductType>                    {                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,                         ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,                        ProductType.M                    }                }            };        public static List<ProductType> GetRnaTerminusSpecificProductTypes(            this FragmentationTerminus fragmentationTerminus)        {            return ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus];        }        /// <summary>        /// Returns all product ion types based upon specified terminus        /// </summary>        /// <param name="dissociationType"></param>        /// <param name="fragmentationTerminus"></param>        /// <returns></returns>        public static List<ProductType> GetRnaTerminusSpecificProductTypesFromDissociation(            this DissociationType dissociationType, FragmentationTerminus fragmentationTerminus)        {            var terminusSpecific = fragmentationTerminus.GetRnaTerminusSpecificProductTypes();            var dissociationSpecific = dissociationType.GetRnaProductTypesFromDissociationType();            return terminusSpecific.Intersect(dissociationSpecific).ToList();        }    }}
\ No newline at end of file
+        /// <summary>        /// Product Ion types by dissociation method        /// </summary>        /// <remarks>        /// HCD ions were taken from the following paper: https://www.nature.com/articles/s41598-023-36193-2        /// Ion types below here should be validated with experimental results.        /// Base and water losses occur very frequently and may also be present in these activation types.        /// CID, UVPD, and aEPD ions were taken from the following paper: https://pubs.acs.org/doi/10.1021/acs.analchem.3c05428?ref=PDF        /// NETD ions were taken from the following paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7161943/        /// lowCID ions were taken from this Thermo Poster: https://assets.thermofisher.com/TFS-Assets/CMD/Flyers/fl-489263-asms23-optimized-fragmentation-oligonucleotides-suppresses-undesired-fragmentation-fl489263-en.pdf        /// </remarks>        public static Dictionary<DissociationType, List<ProductType>> ProductsFromDissociationType =            new Dictionary<DissociationType, List<ProductType>>()            {                { DissociationType.Unknown, new List<ProductType>() },                { DissociationType.Custom, new List<ProductType>() },                {                    DissociationType.AnyActivationType, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.aWaterLoss,                        ProductType.b, ProductType.bBaseLoss, ProductType.bWaterLoss,                        ProductType.c, ProductType.cBaseLoss, ProductType.cWaterLoss,                        ProductType.d, ProductType.dBaseLoss, ProductType.dWaterLoss,                        ProductType.w, ProductType.wBaseLoss, ProductType.wWaterLoss,                        ProductType.x, ProductType.xBaseLoss, ProductType.xWaterLoss,                        ProductType.y, ProductType.yBaseLoss, ProductType.yWaterLoss,                        ProductType.z, ProductType.zBaseLoss, ProductType.zWaterLoss,                        ProductType.M                    }                },                {                    DissociationType.CID, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,                        ProductType.y, ProductType.yWaterLoss, ProductType.M                    }                },                {                    DissociationType.HCD, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d,                        ProductType.dWaterLoss, ProductType.w, ProductType.x, ProductType.y, ProductType.z,                        ProductType.M                    }                },                {                    DissociationType.UVPD, new List<ProductType>                    {                        ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.M                    }                },                {                    DissociationType.aEPD, new List<ProductType>                    {                        ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.x, ProductType.z, ProductType.M                    }                },                {                    DissociationType.NETD, new List<ProductType>                    {                        ProductType.w, ProductType.d, ProductType.M                    }                },                {                    DissociationType.LowCID, new List<ProductType>()                    {                        ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,                        ProductType.y, ProductType.yWaterLoss, ProductType.M                    }                },                { DissociationType.IRMPD, new List<ProductType>() { } },                { DissociationType.ECD, new List<ProductType> { } },                { DissociationType.PQD, new List<ProductType> { } },                { DissociationType.ETD, new List<ProductType> { } },                { DissociationType.EThcD, new List<ProductType> { } },            };
+
+        /// <summary>        /// Returns all dissociation types with implemented product type collections        /// </summary>        public static IEnumerable<DissociationType> AllImplementedDissociationTypes =>            ProductsFromDissociationType.Where(p => p.Value.Any())                .Select(p => p.Key);        /// <summary>        /// Returns list of products types based upon the dissociation type        /// </summary>        /// <param name="dissociationType"></param>        /// <returns></returns>                                                                                                                                                                                                                     public static List<ProductType> GetRnaProductTypesFromDissociationType(this DissociationType dissociationType) =>            ProductsFromDissociationType[dissociationType];        /// <summary>        /// Mass to be added or subtracted        /// </summary>        private static readonly Dictionary<ProductType, ChemicalFormula> FragmentIonCaps =            new Dictionary<ProductType, ChemicalFormula>            {                { ProductType.a, ChemicalFormula.ParseFormula("H") },                { ProductType.aWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },                { ProductType.b, ChemicalFormula.ParseFormula("OH") },                { ProductType.bWaterLoss, ChemicalFormula.ParseFormula("H-1") },                { ProductType.c, ChemicalFormula.ParseFormula("O3H2P") },                { ProductType.cWaterLoss, ChemicalFormula.ParseFormula("O2P") },                { ProductType.d, ChemicalFormula.ParseFormula("O4H2P") },                { ProductType.dWaterLoss, ChemicalFormula.ParseFormula("O3P") },                { ProductType.w, ChemicalFormula.ParseFormula("H") },                { ProductType.wWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },                { ProductType.x, ChemicalFormula.ParseFormula("O-1H") },                { ProductType.xWaterLoss, ChemicalFormula.ParseFormula("O-2H-1") },                { ProductType.y, ChemicalFormula.ParseFormula("O-3P-1") },                { ProductType.yWaterLoss, ChemicalFormula.ParseFormula("O-4H-2P-1") },                { ProductType.z, ChemicalFormula.ParseFormula("O-4P-1") },                { ProductType.zWaterLoss, ChemicalFormula.ParseFormula("O-5H-2P-1") },                //fragment - Base chemical formula is the corresponding fragment chemical formula subtracing 1 H as H is lost when base is removed                { ProductType.aBaseLoss, ChemicalFormula.ParseFormula("H-2") }, // "H-1" -H                 { ProductType.bBaseLoss, ChemicalFormula.ParseFormula("O1H-2") }, //"OH1" -H                { ProductType.cBaseLoss, ChemicalFormula.ParseFormula("O3H-1P") }, //"O3P" -H                { ProductType.dBaseLoss, ChemicalFormula.ParseFormula("O4H-1P") }, //"O4H2P" -H                { ProductType.wBaseLoss, ChemicalFormula.ParseFormula("H-2") }, //"H"-H                { ProductType.xBaseLoss, ChemicalFormula.ParseFormula("O-1H-2") }, //"O-1H" -H                { ProductType.yBaseLoss, ChemicalFormula.ParseFormula("O-3H-2P-1") }, //"O-3P-1" -H                { ProductType.zBaseLoss, ChemicalFormula.ParseFormula("O-4H-3P-1") }, //"O-4H-1P-1" -1                { ProductType.M, new ChemicalFormula() }            };        /// <summary>        /// Returns mass shift by product type        /// </summary>        /// <param name="type"></param>        /// <returns></returns>        public static double GetRnaMassShiftFromProductType(this ProductType type) => FragmentIonCaps[type].MonoisotopicMass;        public static FragmentationTerminus GetRnaTerminusType(this ProductType fragmentType)        {            switch (fragmentType)            {                case ProductType.a:                case ProductType.aWaterLoss:                case ProductType.aBaseLoss:                case ProductType.b:                case ProductType.bWaterLoss:                case ProductType.bBaseLoss:                case ProductType.c:                case ProductType.cWaterLoss:                case ProductType.cBaseLoss:                case ProductType.d:                case ProductType.dWaterLoss:                case ProductType.dBaseLoss:                    return FragmentationTerminus.FivePrime;                case ProductType.w:                case ProductType.wWaterLoss:                case ProductType.wBaseLoss:                case ProductType.x:                case ProductType.xWaterLoss:                case ProductType.xBaseLoss:                case ProductType.y:                case ProductType.yWaterLoss:                case ProductType.yBaseLoss:                case ProductType.z:                case ProductType.zWaterLoss:                case ProductType.zBaseLoss:                    return FragmentationTerminus.ThreePrime;                case ProductType.M:                    return FragmentationTerminus.None;                case ProductType.aStar:                case ProductType.aDegree:                case ProductType.bAmmoniaLoss:                case ProductType.yAmmoniaLoss:                case ProductType.zPlusOne:                case ProductType.D:                case ProductType.Ycore:                case ProductType.Y:                default:                    throw new ArgumentOutOfRangeException(nameof(fragmentType), fragmentType, null);            }        }        /// <summary>        /// Product ion types by Fragmentation Terminus        /// </summary>        private static readonly Dictionary<FragmentationTerminus, List<ProductType>>            ProductIonTypesFromSpecifiedTerminus = new Dictionary<FragmentationTerminus, List<ProductType>>            {                {                    FragmentationTerminus.FivePrime, new List<ProductType>                    {                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,                     }                },                {                    FragmentationTerminus.ThreePrime, new List<ProductType>                    {                        ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,                    }                },                {                    FragmentationTerminus.Both, new List<ProductType>                    {                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,                         ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,                        ProductType.M                    }                }            };        public static List<ProductType> GetRnaTerminusSpecificProductTypes(            this FragmentationTerminus fragmentationTerminus)        {            return ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus];        }        /// <summary>        /// Returns all product ion types based upon specified terminus        /// </summary>        /// <param name="dissociationType"></param>        /// <param name="fragmentationTerminus"></param>        /// <returns></returns>        public static List<ProductType> GetRnaTerminusSpecificProductTypesFromDissociation(            this DissociationType dissociationType, FragmentationTerminus fragmentationTerminus)        {            var terminusSpecific = fragmentationTerminus.GetRnaTerminusSpecificProductTypes();            var dissociationSpecific = dissociationType.GetRnaProductTypesFromDissociationType();            return terminusSpecific.Intersect(dissociationSpecific).ToList();        }    }}
\ No newline at end of file
diff --git a/mzLib/Test/Transcriptomics/TestDigestion.cs b/mzLib/Test/Transcriptomics/TestDigestion.cs
new file mode 100644
index 000000000..6b385be0f
--- /dev/null
+++ b/mzLib/Test/Transcriptomics/TestDigestion.cs
@@ -0,0 +1,780 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
+using System.Linq;
+using System.Security.Cryptography;
+using Chemistry;
+using MassSpectrometry;
+using MathNet.Numerics.Distributions;
+using NUnit.Framework;
+using Omics.Digestion;
+using Omics.Fragmentation;
+using Omics.Modifications;
+using Transcriptomics;
+using Transcriptomics.Digestion;
+using UsefulProteomicsDatabases;
+
+namespace Test.Transcriptomics
+{
+    [TestFixture]
+    [ExcludeFromCodeCoverage]
+    public class TestDigestion
+    {
+        public record RnaDigestionTestCase(string BaseSequence, string Enzyme, int MissedCleavages, int MinLength,
+            int MaxLength, int DigestionProductCount,
+            double[] MonoMasses, string[] Sequences);
+
+        public static IEnumerable<RnaDigestionTestCase> GetTestCases()
+        {
+            // 6bp Top Down
+            yield return new RnaDigestionTestCase("GUACUG", "top-down",
+                0, 1, 6, 1,
+                new[] { 1874.28 },
+                new[] { "GUACUG" });
+            // 6bp Rnase T1, normal
+            yield return new RnaDigestionTestCase("GUACUG", "RNase T1",
+                0, 1, 6, 2,
+                new[] { 363.057, 1529.234 },
+                new[] { "G", "UACUG" });
+            // 6bp Cusativin, normal
+            yield return new RnaDigestionTestCase("GUACUG", "Cusativin",
+                0, 1, 6, 2,
+                new[] { 1303.175, 589.116 },
+                new[] { "GUAC", "UG" });
+            // 6bp Rnase T1, one product too short
+            yield return new RnaDigestionTestCase("GUACUG", "RNase T1",
+                0, 3, 6, 1,
+                new[] { 1529.234 },
+                new[] { "UACUG" });
+            // 6bp Rnase T1, one product too long
+            yield return new RnaDigestionTestCase("GUACUG", "RNase T1",
+                0, 1, 2, 1,
+                new[] { 363.057 },
+                new[] { "G" });
+            // 6bp Rnase T1, 1 missed cleavage
+            yield return new RnaDigestionTestCase("GUACUG", "RNase T1",
+                1, 1, 6, 3,
+                new[] { 363.057, 1529.234, 1874.28 },
+                new[] { "G", "UACUG", "GUACUG" });
+            // 6bp Rnase A
+            yield return new RnaDigestionTestCase("GUACUG", "RNase A",
+                0, 1, 6, 4,
+                new[] { 669.082, 652.103, 324.035, 283.091 },
+                new[] { "GU", "AC", "U", "G" });
+            // 6bp Rnase A, 1 missed cleavage
+            yield return new RnaDigestionTestCase("GUACUG", "RNase A",
+                1, 1, 6, 7,
+                new[] { 669.082, 652.103, 324.035, 283.091, 1303.175, 958.128, 589.116 },
+                new[] { "GU", "AC", "U", "G", "GUAC", "ACU", "UG" });
+            // 6bp Rnase A, 2 missed cleavages
+            yield return new RnaDigestionTestCase("GUACUG", "RNase A",
+                2, 1, 6, 9,
+                new[] { 669.082, 652.103, 324.035, 283.091, 1303.175, 958.128, 589.116, 1609.200, 1223.209 },
+                new[] { "GU", "AC", "U", "G", "GUAC", "ACU", "UG", "GUACU", "ACUG" });
+            // 20bp top-down
+            yield return new RnaDigestionTestCase("GUACUGCCUCUAGUGAAGCA", "top-down",
+                0, 1, int.MaxValue, 1,
+                new[] { 6363.871 },
+                new[] { "GUACUGCCUCUAGUGAAGCA" });
+            // 20bp Rnase T1, normal
+            yield return new RnaDigestionTestCase("GUACUGCCUCUAGUGAAGCA", "RNase T1",
+                0, 1, int.MaxValue, 6,
+                new[] { 363.057, 1609.200, 2219.282, 669.082, 1021.161, 572.137 },
+                new[] { "G", "UACUG", "CCUCUAG", "UG", "AAG", "CA" });
+        }
+
+        public static string rnaseTsvpath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"Digestion\rnases.tsv");
+
+        [OneTimeSetUp]
+        public void OneTimeSetup()
+        {
+            RnaseDictionary.Dictionary = RnaseDictionary.LoadRnaseDictionary(rnaseTsvpath);
+        }
+
+        #region Rnase
+
+        [Test]
+        public void TestRnaseDictionaryLoading()
+        {
+            var rnaseCountFromTsv = File.ReadAllLines(rnaseTsvpath).Length - 1;
+            Assert.That(RnaseDictionary.Dictionary.Count, Is.EqualTo(rnaseCountFromTsv));
+        }
+
+        [Test]
+        [TestCaseSource(nameof(GetTestCases))]
+        public void TestRnase_GetUnmodifiedOligos_Counts(RnaDigestionTestCase testCase)
+        {
+            RNA rna = new RNA(testCase.BaseSequence);
+            Rnase rnase = RnaseDictionary.Dictionary[testCase.Enzyme];
+            var digestionProducts =
+                rnase.GetUnmodifiedOligos(rna, testCase.MissedCleavages, testCase.MinLength, testCase.MaxLength);
+
+            Assert.That(digestionProducts.Count(), Is.EqualTo(testCase.DigestionProductCount));
+        }
+
+        [Test]
+        [TestCaseSource(nameof(GetTestCases))]
+        public void TestRnase_GetUnmodifiedOligo_Sequence(RnaDigestionTestCase testCase)
+        {
+            RNA rna = new RNA(testCase.BaseSequence);
+            Rnase rnase = RnaseDictionary.Dictionary[testCase.Enzyme];
+            var digestionProducts =
+                rnase.GetUnmodifiedOligos(rna, testCase.MissedCleavages, testCase.MinLength, testCase.MaxLength);
+
+            Assert.That(digestionProducts.Count, Is.EqualTo(testCase.Sequences.Length));
+            for (var i = 0; i < digestionProducts.Count; i++)
+            {
+                var product = digestionProducts[i];
+                var testCaseCaseSequence = testCase.Sequences[i];
+                Assert.That(product.BaseSequence == testCaseCaseSequence);
+            }
+        }
+
+        [Test]
+        public void TestRnaseEqualityProperties()
+        {
+            Rnase t1 = RnaseDictionary.Dictionary["RNase T1"];
+            Rnase t1Duplicate = RnaseDictionary.Dictionary["RNase T1"];
+            Rnase t2 = RnaseDictionary.Dictionary["RNase T2"];
+
+            Assert.That(t1.Equals(t1Duplicate));
+            Assert.That(t1.Equals(t1));
+            Assert.That(!t1.Equals(t2));
+            Assert.That(!t1.Equals(null));
+            Assert.That(t1.GetHashCode(), Is.EqualTo(t1Duplicate.GetHashCode()));
+            Assert.That(t1.GetHashCode(), Is.Not.EqualTo(t2.GetHashCode()));
+            Assert.That(t1.Equals((object)t1Duplicate));
+            Assert.That(t1.Equals((object)t1));
+            Assert.That(!t1.Equals((object)t2));
+            Assert.That(!t1.Equals((object)null));
+            // ReSharper disable once SuspiciousTypeConversion.Global
+            Assert.That(!t1.Equals((object)new RNA("GUA")));
+        }
+
+        [Test]
+        public void TestRnase_UnmodifiedOligos_Exception()
+        {
+            Rnase rnase = new Rnase("Bad", CleavageSpecificity.SingleC, new List<DigestionMotif>());
+            Assert.Throws<ArgumentException>(() => { rnase.GetUnmodifiedOligos(new RNA("GUACUG"), 0, 1, 6); });
+        }
+
+        #endregion
+
+        #region NucleolyticOligo
+
+        [Test]
+        public void TestNucleolyticOligoProperties_FivePrimeDigestionProduct()
+        {
+            RNA rna = new("GUACUG");
+            Rnase rnase = RnaseDictionary.Dictionary["RNase U2"];
+            var digestionProducts = rnase.GetUnmodifiedOligos(rna, 0, 1, 6);
+            Assert.That(digestionProducts.Count, Is.EqualTo(3));
+
+            var oligo = digestionProducts[0];
+            Assert.That(oligo.BaseSequence, Is.EqualTo("G"));
+            Assert.That(oligo.OneBasedStartResidue, Is.EqualTo(1));
+            Assert.That(oligo.OneBasedEndResidue, Is.EqualTo(1));
+            Assert.That(oligo.MissedCleavages, Is.EqualTo(0));
+            Assert.That(oligo.CleavageSpecificityForFdrCategory, Is.EqualTo(CleavageSpecificity.Full));
+            Assert.That(oligo.NextResidue, Is.EqualTo('U'));
+            Assert.That(oligo.PreviousResidue, Is.EqualTo('-'));
+            Assert.That(oligo.ToString(), Is.EqualTo(oligo.BaseSequence));
+        }
+
+        [Test]
+        public void TestNucleolyticOligoProperties_ThreePrimeDigestionProduct()
+        {
+            RNA rna = new("GUACUG");
+            Rnase rnase = RnaseDictionary.Dictionary["RNase U2"];
+            var digestionProducts = rnase.GetUnmodifiedOligos(rna, 0, 1, 6);
+            Assert.That(digestionProducts.Count, Is.EqualTo(3));
+
+            NucleolyticOligo oligo = digestionProducts[2];
+            Assert.That(oligo.BaseSequence, Is.EqualTo("CUG"));
+            Assert.That(oligo.OneBasedStartResidue, Is.EqualTo(4));
+            Assert.That(oligo.OneBasedEndResidue, Is.EqualTo(6));
+            Assert.That(oligo.MissedCleavages, Is.EqualTo(0));
+            Assert.That(oligo.CleavageSpecificityForFdrCategory, Is.EqualTo(CleavageSpecificity.Full));
+            Assert.That(oligo.NextResidue, Is.EqualTo('-'));
+            Assert.That(oligo.PreviousResidue, Is.EqualTo('A'));
+            Assert.That(oligo.ToString(), Is.EqualTo(oligo.BaseSequence));
+        }
+
+        [Test]
+        public void TestNucleolyticOligoProperties_InternalDigestionProduct()
+        {
+            RNA rna = new("GUACUG");
+            Rnase rnase = RnaseDictionary.Dictionary["RNase U2"];
+            var digestionProducts = rnase.GetUnmodifiedOligos(rna, 0, 1, 6);
+            Assert.That(digestionProducts.Count, Is.EqualTo(3));
+
+            NucleolyticOligo oligo = digestionProducts[1];
+            Assert.That(oligo.BaseSequence, Is.EqualTo("UA"));
+            Assert.That(oligo.OneBasedStartResidue, Is.EqualTo(2));
+            Assert.That(oligo.OneBasedEndResidue, Is.EqualTo(3));
+            Assert.That(oligo.MissedCleavages, Is.EqualTo(0));
+            Assert.That(oligo.CleavageSpecificityForFdrCategory, Is.EqualTo(CleavageSpecificity.Full));
+            Assert.That(oligo.NextResidue, Is.EqualTo('C'));
+            Assert.That(oligo.PreviousResidue, Is.EqualTo('G'));
+            Assert.That(oligo.ToString(), Is.EqualTo(oligo.BaseSequence));
+        }
+
+        [Test]
+        public void TestNucleolyticOligoProperties_TopDownDigestionProduct()
+        {
+            RNA rna = new("GUACUG");
+            Rnase rnase = RnaseDictionary.Dictionary["top-down"];
+            var digestionProducts = rnase.GetUnmodifiedOligos(rna, 0, 1, 6);
+            Assert.That(digestionProducts.Count, Is.EqualTo(1));
+
+            NucleolyticOligo oligo = digestionProducts[0];
+            Assert.That(oligo.BaseSequence, Is.EqualTo("GUACUG"));
+            Assert.That(oligo.OneBasedStartResidue, Is.EqualTo(1));
+            Assert.That(oligo.OneBasedEndResidue, Is.EqualTo(6));
+            Assert.That(oligo.MissedCleavages, Is.EqualTo(0));
+            Assert.That(oligo.CleavageSpecificityForFdrCategory, Is.EqualTo(CleavageSpecificity.Full));
+            Assert.That(oligo.NextResidue, Is.EqualTo('-'));
+            Assert.That(oligo.PreviousResidue, Is.EqualTo('-'));
+            Assert.That(oligo.ToString(), Is.EqualTo(oligo.BaseSequence));
+        }
+
+        #endregion
+
+        #region OligoWithSetMods
+
+        private static (string Sequence, int FragmentNumber, ProductType Type, double Mass)[] DigestFragmentTestCases =>
+            new (string Sequence, int FragmentNumber, ProductType Type, double Mass)[]
+            {
+                ("UAG", 0, ProductType.M, 998.134),
+                ("UAG", 1, ProductType.aBaseLoss, 114.031), ("UAG", 2, ProductType.aBaseLoss, 420.056),
+                ("UAG", 1, ProductType.c, 308.031), ("UAG", 2, ProductType.c, 637.093),
+                ("UAG", 1, ProductType.dWaterLoss, 306.025), ("UAG", 2, ProductType.dWaterLoss, 635.077),
+                ("UAG", 1, ProductType.w, 443.023), ("UAG", 2, ProductType.w, 772.075),
+                ("UAG", 1, ProductType.y,  363.057), ("UAG", 2, ProductType.y, 692.109),
+                ("UAG", 1, ProductType.yWaterLoss,  345.047), ("UAG", 2, ProductType.yWaterLoss, 674.100),
+
+                ("UCG", 0, ProductType.M, 974.123),
+                ("UCG", 1, ProductType.aBaseLoss, 114.031), ("UCG", 2, ProductType.aBaseLoss, 420.056),
+                ("UCG", 1, ProductType.c, 308.040), ("UCG", 2, ProductType.c, 613.082),
+                ("UCG", 1, ProductType.dWaterLoss, 306.025), ("UCG", 2, ProductType.dWaterLoss, 611.066),
+                ("UCG", 1, ProductType.w, 443.023), ("UCG", 2, ProductType.w, 748.064),
+                ("UCG", 1, ProductType.y,  363.057), ("UCG", 2, ProductType.y, 668.098),
+                ("UCG", 1, ProductType.yWaterLoss,  345.047), ("UCG", 2, ProductType.yWaterLoss, 650.089),
+
+                ("UUG", 0, ProductType.M, 975.107),
+                ("UUG", 1, ProductType.aBaseLoss, 114.031), ("UUG", 2, ProductType.aBaseLoss, 420.056),
+                ("UUG", 1, ProductType.c, 308.041), ("UUG", 2, ProductType.c, 614.066),
+                ("UUG", 1, ProductType.dWaterLoss, 306.025), ("UUG", 2, ProductType.dWaterLoss, 612.050),
+                ("UUG", 1, ProductType.w, 443.023), ("UUG", 2, ProductType.w, 749.048),
+                ("UUG", 1, ProductType.y,  363.057), ("UUG", 2, ProductType.y, 669.082),
+                ("UUG", 1, ProductType.yWaterLoss,  345.047), ("UUG", 2, ProductType.yWaterLoss, 651.073),
+
+                ("AUAG", 0, ProductType.M, 1247.220),
+                ("AUAG", 1, ProductType.aBaseLoss, 114.031), ("AUAG", 2, ProductType.aBaseLoss, 443.083), ("AUAG", 3, ProductType.aBaseLoss, 749.108),
+                ("AUAG", 1, ProductType.c, 331.068), ("AUAG", 2, ProductType.c, 637.093), ("AUAG", 3, ProductType.c, 966.146),
+                ("AUAG", 1, ProductType.dWaterLoss, 329.052), ("AUAG", 2, ProductType.dWaterLoss, 635.077), ("AUAG", 3, ProductType.dWaterLoss, 964.129),
+                ("AUAG", 1, ProductType.w, 363.057), ("AUAG", 2, ProductType.w, 692.109), ("AUAG", 3, ProductType.w, 998.134),
+                ("AUAG", 1, ProductType.y,  283.091), ("AUAG", 2, ProductType.y, 612.143), ("AUAG", 3, ProductType.y, 918.168),
+                ("AUAG", 1, ProductType.yWaterLoss,  265.081), ("AUAG", 2, ProductType.yWaterLoss, 594.134), ("AUAG", 3, ProductType.yWaterLoss, 900.159),
+            };
+
+        [Test] // test values calculated with http://rna.rega.kuleuven.be/masspec/mongo.htm
+        [TestCase("UAGUCGUUGAUAG", 4140.555, new[] { "UAG", "UCG", "UUG", "AUAG" },
+            new[] { 998.134, 974.123, 975.107, 1247.220 })]
+        public static void TestDigestionAndFragmentation(string sequence, double monoMass,
+            string[] digestionProductSequences, double[] digestionProductMasses)
+        {
+            RNA rna = new(sequence);
+            Assert.That(rna.MonoisotopicMass, Is.EqualTo(monoMass).Within(0.01));
+
+            // digest RNA
+            var digestionParams = new RnaDigestionParams("RNase T1");
+            var products = rna.Digest(digestionParams, new List<Modification>(), new List<Modification>())
+                .Select(p => (OligoWithSetMods)p).ToList();
+            Assert.That(products.Count, Is.EqualTo(digestionProductSequences.Length));
+
+            // ensure digestion sequence and masses are correct
+            for (var index = 0; index < products.Count; index++)
+            {
+                var digestionProduct = products[index];
+                Assert.That(digestionProduct.BaseSequence, Is.EqualTo(digestionProductSequences[index]));
+                Assert.That(digestionProduct.MonoisotopicMass, Is.EqualTo(digestionProductMasses[index]).Within(0.01));
+
+                List<Product> fragments = new();
+                digestionProduct.Fragment(DissociationType.CID, FragmentationTerminus.Both, fragments);
+
+                List<(int FragmentNumber, ProductType Type, double Mass)[]> ughh = new();
+
+                // test that fragments are correct
+                var fragmentsToCompare = DigestFragmentTestCases
+                    .Where(p => p.Sequence.Equals(digestionProduct.BaseSequence)).ToList();
+                for (var i = 0; i < fragments.Count; i++)
+                {
+                    var fragment = fragments[i];
+                    var theoreticalFragment = fragmentsToCompare[i];
+                    Assert.That(fragment.MonoisotopicMass, Is.EqualTo(theoreticalFragment.Mass).Within(0.01));
+                    Assert.That(fragment.FragmentNumber, Is.EqualTo(theoreticalFragment.FragmentNumber));
+                    Assert.That(fragment.ProductType, Is.EqualTo(theoreticalFragment.Type));
+                    Assert.That(fragment.FragmentNumber, Is.EqualTo(theoreticalFragment.FragmentNumber));
+                    if (fragment.Terminus == FragmentationTerminus.FivePrime)
+                        Assert.That(fragment.AminoAcidPosition, Is.EqualTo(theoreticalFragment.FragmentNumber));
+                    else if (fragment.Terminus == FragmentationTerminus.None)
+                        Assert.That(fragment.FragmentNumber, Is.EqualTo(0));
+                    else
+                        Assert.That(fragment.AminoAcidPosition, Is.EqualTo(digestionProductSequences[index].Length - theoreticalFragment.FragmentNumber));
+                }
+            }
+        }
+
+        [Test]
+        [TestCase("UAGUCGUUGAUAG", new[] { "UAG", "UCG", "UUG", "AUAG" },
+            new[] { 1, 4, 7, 10 }, new[] { 3, 6, 9, 13 }, new[] { '-', 'G', 'G', 'G' },
+            new[] { 'U', 'U', 'A', '-' })]
+        public static void TestOligoWithSetMods_AAPositions(string sequence, string[] digestionProductSequences,
+        int[] startResidue, int[] endResidue, char[] preciousResidue, char[] nextResidue)
+        {
+            RNA rna = new RNA(sequence);
+            var digestionProducts = rna.Digest(new RnaDigestionParams("RNase T1"), new List<Modification>(),
+                new List<Modification>()).Select(p => (OligoWithSetMods)p).ToList();
+
+            Assert.That(digestionProducts.All(p => p.DigestionParams.DigestionAgent.Name == "RNase T1"));
+            for (var index = 0; index < digestionProducts.Count; index++)
+            {
+                var digestionProduct = digestionProducts[index];
+                Assert.That(digestionProduct.BaseSequence, Is.EqualTo(digestionProductSequences[index]));
+                Assert.That(digestionProduct.OneBasedStartResidue, Is.EqualTo(startResidue[index]));
+                Assert.That(digestionProduct.OneBasedEndResidue, Is.EqualTo(endResidue[index]));
+                Assert.That(digestionProduct.PreviousResidue, Is.EqualTo(preciousResidue[index]));
+                Assert.That(digestionProduct.NextResidue, Is.EqualTo(nextResidue[index]));
+            }
+        }
+
+        [Test]
+        public static void TestTermini_ThreePrimeCyclicPhosphate()
+        {
+            string sequence = "UAGUCGUUGAUAG";
+            RNA rna = new RNA(sequence);
+            var oligoCyclicPhosphate = PtmListLoader.ReadModsFromString(
+                "ID   Cyclic Phosphate\r\nTG   X\r\nPP   Oligo 3'-terminal.\r\nMT   Digestion Termini\r\nCF   H-2 O-1\r\nDR   Unimod; 280.\r\n//",
+                out List<(Modification, string)> errors).First();
+            var nucleicAcidCyclicPhosphate = PtmListLoader.ReadModsFromString(
+                "ID   Cyclic Phosphate\r\nTG   X\r\nPP   3'-terminal.\r\nMT   Digestion Termini\r\nCF   H-2 O-1\r\nDR   Unimod; 280.\r\n//",
+                out errors).First();
+            Assert.That(!errors.Any());
+
+            // top-down digestion, 3' terminal modification
+            var variableMods = new List<Modification> { nucleicAcidCyclicPhosphate };
+            var digestionParams = new RnaDigestionParams("top-down");
+            var digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
+                .Select(p => (OligoWithSetMods)p).ToList();
+            Assert.That(digestionProducts.Count, Is.EqualTo(2));
+            Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG"));
+            Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("UAGUCGUUGAUAG[Digestion Termini:Cyclic Phosphate on X]"));
+
+            // top-down digestion, 3' oligo terminal modification
+            variableMods = new List<Modification> { oligoCyclicPhosphate };
+            digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
+                .Select(p => (OligoWithSetMods)p).ToList();
+            Assert.That(digestionProducts.Count, Is.EqualTo(1));
+            Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG"));
+
+            // RNase T1 digestion, 3' terminal modification
+            digestionParams = new RnaDigestionParams("RNase T1");
+            variableMods = new List<Modification> { nucleicAcidCyclicPhosphate };
+            digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
+                .Select(p => (OligoWithSetMods)p).ToList();
+            Assert.That(digestionProducts.Count, Is.EqualTo(5));
+            var expected = new List<string>()
+            {
+                "UAG", "UCG", "UUG", "AUAG", "AUAG[Digestion Termini:Cyclic Phosphate on X]"
+            };
+            for (int i = 0; i < expected.Count; i++)
+            {
+                Assert.That(digestionProducts[i].FullSequence, Is.EqualTo(expected[i]));
+            }
+
+            // RNase T1 digestion, 3' oligo terminal modification 
+            variableMods = new List<Modification> { oligoCyclicPhosphate };
+            digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
+                .Select(p => (OligoWithSetMods)p).ToList();
+            Assert.That(digestionProducts.Count, Is.EqualTo(7));
+            expected = new List<string>()
+            {
+                "UAG", "UAG[Digestion Termini:Cyclic Phosphate on X]",
+                "UCG", "UCG[Digestion Termini:Cyclic Phosphate on X]",
+                "UUG", "UUG[Digestion Termini:Cyclic Phosphate on X]",
+                "AUAG",
+            };
+
+            for (int i = 0; i < expected.Count; i++)
+            {
+                Assert.That(digestionProducts[i].FullSequence, Is.EqualTo(expected[i]));
+            }
+        }
+
+        [Test]
+        public static void TestTermini_FivePrimeLargeMod()
+        {
+            string sequence = "UAGUCGUUGAUAG";
+            RNA rna = new RNA(sequence);
+            var oligoLargeMod = PtmListLoader.ReadModsFromString(
+                "ID   Pfizer 5'-Cap\r\nTG   X\r\nPP   Oligo 5'-terminal.\r\nMT   Standard\r\nCF   C13H22N5O14P3\r\nDR   Unimod; 280.\r\n//",
+                out List<(Modification, string)> errors).First();
+            var nucleicAcidLargeMod = PtmListLoader.ReadModsFromString(
+                "ID   Pfizer 5'-Cap\r\nTG   X\r\nPP   5'-terminal.\r\nMT   Standard\r\nCF   C13H22N5O14P3\r\nDR   Unimod; 280.\r\n//",
+                out errors).First();
+            Assert.That(!errors.Any());
+
+            // top-down digestion, 5' terminal modification
+            var variableMods = new List<Modification> { nucleicAcidLargeMod };
+            var digestionParams = new RnaDigestionParams("top-down");
+            var digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
+                .Select(p => (OligoWithSetMods)p).ToList();
+            Assert.That(digestionProducts.Count, Is.EqualTo(2));
+            Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG"));
+            Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("[Standard:Pfizer 5'-Cap on X]UAGUCGUUGAUAG"));
+
+            // top-down digestion, 5' oligo terminal modification
+            variableMods = new List<Modification> { oligoLargeMod };
+            digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
+                .Select(p => (OligoWithSetMods)p).ToList();
+            Assert.That(digestionProducts.Count, Is.EqualTo(1));
+            Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG"));
+
+            // RNase T1 digestion, 5' terminal modification
+            digestionParams = new RnaDigestionParams("RNase T1");
+            variableMods = new List<Modification> { nucleicAcidLargeMod };
+            digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
+                .Select(p => (OligoWithSetMods)p).ToList();
+            Assert.That(digestionProducts.Count, Is.EqualTo(5));
+            var expected = new List<string>()
+            {
+                "UAG", "[Standard:Pfizer 5'-Cap on X]UAG", "UCG", "UUG", "AUAG"
+            };
+            for (int i = 0; i < expected.Count; i++)
+            {
+                Assert.That(digestionProducts[i].FullSequence, Is.EqualTo(expected[i]));
+            }
+
+            // RNase T1 digestion, 5' oligo terminal modification 
+            variableMods = new List<Modification> { oligoLargeMod };
+            digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
+                .Select(p => (OligoWithSetMods)p).ToList();
+            Assert.That(digestionProducts.Count, Is.EqualTo(7));
+            expected = new List<string>()
+            {
+                "UAG",
+                "UCG", "[Standard:Pfizer 5'-Cap on X]UCG",
+                "UUG", "[Standard:Pfizer 5'-Cap on X]UUG",
+                "AUAG", "[Standard:Pfizer 5'-Cap on X]AUAG"
+            };
+
+            for (int i = 0; i < expected.Count; i++)
+            {
+                Assert.That(digestionProducts[i].FullSequence, Is.EqualTo(expected[i]));
+            }
+        }
+
+        [Test]
+        [TestCase("UAGUCGUUGAUAG")]
+        public static void TestOligoWithSetMods_PropertiesWithTopDownDigestion(string sequence)
+        {
+            var rna = new RNA(sequence);
+            var oligoWithSetMods =
+                rna.Digest(new RnaDigestionParams(), new List<Modification>(), new List<Modification>())
+                        .First() as OligoWithSetMods ?? throw new NullReferenceException();
+
+            Assert.That(rna.BaseSequence, Is.EqualTo(oligoWithSetMods.BaseSequence));
+            Assert.That(rna.ThreePrimeTerminus, Is.EqualTo(oligoWithSetMods.ThreePrimeTerminus));
+            Assert.That(rna.FivePrimeTerminus, Is.EqualTo(oligoWithSetMods.FivePrimeTerminus));
+            Assert.That(rna.ThisChemicalFormula, Is.EqualTo(oligoWithSetMods.ThisChemicalFormula));
+            Assert.That(rna.Length, Is.EqualTo(oligoWithSetMods.Length));
+        }
+
+        [Test]
+        public static void OligoWithSetMods_CalculatedValues()
+        {
+            var rna = new RNA("GUACUG");
+            var rnaFormula = rna.ThisChemicalFormula;
+
+            string modText = "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//";
+            var sodiumAdduct = PtmListLoader.ReadModsFromString(modText, out List<(Modification, string)> mods).First();
+            var oligoWithSetMods =
+                rna.Digest(new RnaDigestionParams(), new List<Modification>() { sodiumAdduct }, new List<Modification>())
+                    .First() as OligoWithSetMods ?? throw new NullReferenceException();
+
+            Assert.That(oligoWithSetMods.NumMods, Is.EqualTo(1));
+            Assert.That(oligoWithSetMods.NumFixedMods, Is.EqualTo(1));
+            Assert.That(oligoWithSetMods.NumVariableMods, Is.EqualTo(0));
+
+            var formula = oligoWithSetMods.ThisChemicalFormula;
+            Assert.That(formula, Is.EqualTo(rnaFormula + sodiumAdduct.ChemicalFormula));
+
+            var formulaToAdd = ChemicalFormula.ParseFormula("H");
+            var deltaMass = formulaToAdd.MonoisotopicMass;
+            var oldMonoMass = oligoWithSetMods.MonoisotopicMass;
+            var oldMostAbundantMass = oligoWithSetMods.MostAbundantMonoisotopicMass;
+
+            oligoWithSetMods.FivePrimeTerminus = formulaToAdd + oligoWithSetMods.FivePrimeTerminus;
+
+            Assert.That(oligoWithSetMods.MonoisotopicMass, Is.EqualTo(oldMonoMass + deltaMass).Within(0.01));
+            Assert.That(oligoWithSetMods.MostAbundantMonoisotopicMass, Is.EqualTo(oldMostAbundantMass + deltaMass).Within(0.01));
+            Assert.That(oligoWithSetMods.ThisChemicalFormula, Is.EqualTo(formula + formulaToAdd));
+
+            oldMonoMass = oligoWithSetMods.MonoisotopicMass;
+            oldMostAbundantMass = oligoWithSetMods.MostAbundantMonoisotopicMass;
+            oligoWithSetMods.ThreePrimeTerminus = formulaToAdd + oligoWithSetMods.ThreePrimeTerminus;
+
+            Assert.That(oligoWithSetMods.MonoisotopicMass, Is.EqualTo(oldMonoMass + deltaMass).Within(0.01));
+            Assert.That(oligoWithSetMods.MostAbundantMonoisotopicMass, Is.EqualTo(oldMostAbundantMass + deltaMass).Within(0.01));
+            Assert.That(oligoWithSetMods.ThisChemicalFormula, Is.EqualTo(formula + formulaToAdd + formulaToAdd));
+        }
+
+        #endregion
+
+        #region DigestionParams
+
+        [Test]
+        [TestCaseSource(nameof(GetTestCases))]
+        public void TestDigestionParams_Properties(RnaDigestionTestCase testCase)
+        {
+            var rna = new RNA(testCase.BaseSequence);
+            var digestionParams = new RnaDigestionParams(testCase.Enzyme, testCase.MissedCleavages, testCase.MinLength,
+                testCase.MaxLength);
+
+            Assert.That(digestionParams.DigestionAgent, Is.EqualTo(RnaseDictionary.Dictionary[testCase.Enzyme]));
+            Assert.That(digestionParams.MaxMissedCleavages, Is.EqualTo(testCase.MissedCleavages));
+            Assert.That(digestionParams.MinLength, Is.EqualTo(testCase.MinLength));
+            Assert.That(digestionParams.MaxLength, Is.EqualTo(testCase.MaxLength));
+
+            digestionParams.MaxModificationIsoforms = 2048;
+            digestionParams.MaxMods = 3;
+            Assert.That(digestionParams.MaxModificationIsoforms, Is.EqualTo(2048));
+            Assert.That(digestionParams.MaxMods, Is.EqualTo(3));
+
+            var digestionProducts = rna.Digest(digestionParams, new List<Modification>(), new List<Modification>());
+            Assert.That(digestionProducts.Count(), Is.EqualTo(testCase.DigestionProductCount));
+        }
+
+        #endregion
+
+        #region NucleicAcid
+
+
+        [Test]
+        [TestCaseSource(nameof(GetTestCases))]
+        public void TestNucleicAcid_Digestion_WithoutMods_Counts(RnaDigestionTestCase testCase)
+        {
+            var rna = new RNA(testCase.BaseSequence);
+            var digestionParams = new RnaDigestionParams(testCase.Enzyme, testCase.MissedCleavages, testCase.MinLength,
+                testCase.MaxLength);
+
+            var digestionProducts = rna.Digest(digestionParams, new List<Modification>(), new List<Modification>());
+            Assert.That(digestionProducts.Count(), Is.EqualTo(testCase.DigestionProductCount));
+        }
+
+        [Test]
+        [TestCaseSource(nameof(GetTestCases))]
+        public void TestNucleicAcid_Digestion_WithoutMods_Sequences(RnaDigestionTestCase testCase)
+        {
+            var rna = new RNA(testCase.BaseSequence);
+            var digestionParams = new RnaDigestionParams(testCase.Enzyme, testCase.MissedCleavages, testCase.MinLength,
+                testCase.MaxLength);
+
+            var digestionProducts = rna.Digest(digestionParams, new List<Modification>(), new List<Modification>())
+                .ToList();
+
+            Assert.That(digestionProducts.Count, Is.EqualTo(testCase.Sequences.Length));
+            for (var i = 0; i < digestionProducts.Count; i++)
+            {
+                var product = digestionProducts[i];
+                var testCaseCaseSequence = testCase.Sequences[i];
+                Assert.That(product.BaseSequence, Is.EqualTo(testCaseCaseSequence));
+                Assert.That(product.FullSequence, Is.EqualTo(testCaseCaseSequence));
+            }
+        }
+
+        [Test]
+        [TestCaseSource(nameof(GetTestCases))]
+        public void TestNucleicAcid_Digestion_WithoutMods_MonoMasses(RnaDigestionTestCase testCase)
+        {
+            var rna = new RNA(testCase.BaseSequence);
+            var digestionParams = new RnaDigestionParams(testCase.Enzyme, testCase.MissedCleavages, testCase.MinLength,
+                testCase.MaxLength);
+
+            var digestionProducts = rna.Digest(digestionParams, new List<Modification>(), new List<Modification>())
+                .ToList();
+
+            Assert.That(digestionProducts.Count, Is.EqualTo(testCase.Sequences.Length));
+            for (var i = 0; i < digestionProducts.Count; i++)
+            {
+                var productMass = digestionProducts[i].MonoisotopicMass;
+                var testCaseCaseMass = testCase.MonoMasses[i];
+                Assert.That(productMass, Is.EqualTo(testCaseCaseMass).Within(0.01));
+            }
+        }
+
+        #endregion
+
+        #region Digestion with Modifications
+
+        [Test]
+        public static void TestVariableModsCountCorrect()
+        {
+            string modText = "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A or C or G or U\r\nCF   Na1H-1\r\n" + @"//";
+            var sodiumAdducts = PtmListLoader.ReadModsFromString(modText, out List<(Modification, string)> mods)
+                .ToList();
+            Assert.That(sodiumAdducts.Count, Is.EqualTo(4));
+
+            var rna = new RNA("GUACUG");
+            var rnaDigestionParams = new RnaDigestionParams()
+            {
+                MaxMods = 1,
+            };
+
+            var precursors = rna.Digest(rnaDigestionParams, new List<Modification>(), sodiumAdducts)
+                .ToList();
+            Assert.That(precursors.Count, Is.EqualTo(7));
+            var fullSequences = precursors.Select(p => p.FullSequence).ToList();
+            Assert.That(fullSequences.Contains("GUACUG"));
+            Assert.That(fullSequences.Contains("G[Metal:Sodium on G]UACUG"));
+            Assert.That(fullSequences.Contains("GU[Metal:Sodium on U]ACUG"));
+            Assert.That(fullSequences.Contains("GUA[Metal:Sodium on A]CUG"));
+            Assert.That(fullSequences.Contains("GUAC[Metal:Sodium on C]UG"));
+            Assert.That(fullSequences.Contains("GUACU[Metal:Sodium on U]G"));
+            Assert.That(fullSequences.Contains("GUACUG[Metal:Sodium on G]"));
+
+            rnaDigestionParams.MaxMods = 2;
+            precursors = rna.Digest(rnaDigestionParams, new List<Modification>(), sodiumAdducts)
+                .ToList();
+            Assert.That(precursors.Count, Is.EqualTo(22));
+            fullSequences = precursors.Select(p => p.FullSequence).ToList();
+            Assert.That(fullSequences.Contains("GUACUG"));
+            Assert.That(fullSequences.Contains("G[Metal:Sodium on G]UACUG"));
+            Assert.That(fullSequences.Contains("G[Metal:Sodium on G]U[Metal:Sodium on U]ACUG"));
+            Assert.That(fullSequences.Contains("G[Metal:Sodium on G]UA[Metal:Sodium on A]CUG"));
+            Assert.That(fullSequences.Contains("G[Metal:Sodium on G]UAC[Metal:Sodium on C]UG"));
+            Assert.That(fullSequences.Contains("G[Metal:Sodium on G]UACU[Metal:Sodium on U]G"));
+            Assert.That(fullSequences.Contains("G[Metal:Sodium on G]UACUG[Metal:Sodium on G]"));
+            Assert.That(fullSequences.Contains("GU[Metal:Sodium on U]ACUG"));
+            Assert.That(fullSequences.Contains("GU[Metal:Sodium on U]A[Metal:Sodium on A]CUG"));
+            Assert.That(fullSequences.Contains("GU[Metal:Sodium on U]AC[Metal:Sodium on C]UG"));
+            Assert.That(fullSequences.Contains("GU[Metal:Sodium on U]ACU[Metal:Sodium on U]G"));
+            Assert.That(fullSequences.Contains("GU[Metal:Sodium on U]ACUG[Metal:Sodium on G]"));
+            Assert.That(fullSequences.Contains("GUA[Metal:Sodium on A]CUG"));
+            Assert.That(fullSequences.Contains("GUA[Metal:Sodium on A]C[Metal:Sodium on C]UG"));
+            Assert.That(fullSequences.Contains("GUA[Metal:Sodium on A]CU[Metal:Sodium on U]G"));
+            Assert.That(fullSequences.Contains("GUA[Metal:Sodium on A]CUG[Metal:Sodium on G]"));
+            Assert.That(fullSequences.Contains("GUAC[Metal:Sodium on C]UG"));
+            Assert.That(fullSequences.Contains("GUAC[Metal:Sodium on C]U[Metal:Sodium on U]G"));
+            Assert.That(fullSequences.Contains("GUAC[Metal:Sodium on C]UG[Metal:Sodium on G]"));
+            Assert.That(fullSequences.Contains("GUACU[Metal:Sodium on U]G"));
+            Assert.That(fullSequences.Contains("GUACU[Metal:Sodium on U]G[Metal:Sodium on G]"));
+            Assert.That(fullSequences.Contains("GUACUG[Metal:Sodium on G]"));
+        }
+
+        [Test]
+        public static void TestFixedModsCountCorrect()
+        {
+            string modText = "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//";
+            var sodiumAdduct = PtmListLoader.ReadModsFromString(modText, out List<(Modification, string)> mods)
+                .ToList();
+
+            var rna = new RNA("GUACUG");
+            var rnaDigestionParams = new RnaDigestionParams()
+            {
+                MaxMods = 1,
+            };
+            var precursors = rna.Digest(rnaDigestionParams, sodiumAdduct, new List<Modification>())
+                .ToList();
+            Assert.That(precursors.Count, Is.EqualTo(1));
+            Assert.That(precursors.First().NumFixedMods, Is.EqualTo(1));
+            Assert.That(precursors.First().FullSequence, Is.EqualTo("GUA[Metal:Sodium on A]CUG"));
+            Assert.That(precursors.First().MonoisotopicMass, Is.EqualTo(1896.26).Within(0.01));
+
+            modText = "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   G\r\nCF   Na1H-1\r\n" + @"//";
+            sodiumAdduct = PtmListLoader.ReadModsFromString(modText, out mods)
+                .ToList();
+
+            precursors = rna.Digest(rnaDigestionParams, sodiumAdduct, new List<Modification>())
+                .ToList();
+            Assert.That(precursors.Count, Is.EqualTo(1));
+            Assert.That(precursors.First().NumFixedMods, Is.EqualTo(2));
+            Assert.That(precursors.First().FullSequence, Is.EqualTo("G[Metal:Sodium on G]UACUG[Metal:Sodium on G]"));
+            Assert.That(precursors.First().MonoisotopicMass, Is.EqualTo(1918.25).Within(0.01));
+        }
+
+        [Test]
+        public static void TestFixedAndVariableMods()
+        {
+            string modText = "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A or C or G or U\r\nCF   Na1H-1\r\n" + @"//";
+            string modText2 = "ID   Potassium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A or C or G or U\r\nCF   K1H-1\r\n" + @"//";
+            var sodiumAdducts = PtmListLoader.ReadModsFromString(modText, out List<(Modification, string)> mods)
+                .ToList();
+            var potassiumAdducts = PtmListLoader.ReadModsFromString(modText2, out mods)
+                .ToList();
+
+            Assert.That(sodiumAdducts.Count, Is.EqualTo(4));
+            Assert.That(potassiumAdducts.Count, Is.EqualTo(4));
+
+            var rna = new RNA("GUACUG");
+            var rnaDigestionParams = new RnaDigestionParams();
+
+            rnaDigestionParams.MaxMods = 1;
+            var fixedMods = new List<Modification> { potassiumAdducts[0] }; // A
+            var variableMods = new List<Modification> { sodiumAdducts[1] }; // C
+            var precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                .ToList();
+
+            var fullSequences = precursors.Select(p => p.FullSequence).ToList();
+            Assert.That(precursors.Count, Is.EqualTo(2));
+            Assert.That(precursors.All(p => p.NumFixedMods == 1));
+            Assert.That(fullSequences.Contains("GUA[Metal:Potassium on A]CUG"));
+            Assert.That(fullSequences.Contains("GUA[Metal:Potassium on A]C[Metal:Sodium on C]UG"));
+
+            var oneOfEach = precursors.First(p => p.FullSequence.Equals("GUA[Metal:Potassium on A]C[Metal:Sodium on C]UG"));
+            Assert.That(oneOfEach.NumFixedMods, Is.EqualTo(1));
+            Assert.That(oneOfEach.NumVariableMods, Is.EqualTo(1));
+            Assert.That(oneOfEach.NumMods, Is.EqualTo(2));
+
+            fixedMods = new List<Modification> { potassiumAdducts[2] }; // G
+            variableMods = new List<Modification> { sodiumAdducts[1] }; // C
+            precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                .ToList();
+            fullSequences = precursors.Select(p => p.FullSequence).ToList();
+            Assert.That(precursors.Count, Is.EqualTo(2));
+            Assert.That(precursors.All(p => p.NumFixedMods == 2));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Potassium on G]"));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UAC[Metal:Sodium on C]UG[Metal:Potassium on G]"));
+
+            fixedMods = new List<Modification> { potassiumAdducts[2] }; // G
+            variableMods = new List<Modification> { sodiumAdducts[1], sodiumAdducts[3] }; // C, U
+            precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                .ToList();
+            fullSequences = precursors.Select(p => p.FullSequence).ToList();
+            Assert.That(precursors.Count, Is.EqualTo(4));
+            Assert.That(precursors.All(p => p.NumFixedMods == 2));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Potassium on G]"));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UAC[Metal:Sodium on C]UG[Metal:Potassium on G]"));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACU[Metal:Sodium on U]G[Metal:Potassium on G]"));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]U[Metal:Sodium on U]ACUG[Metal:Potassium on G]"));
+
+            rnaDigestionParams.MaxMods = 2;
+            precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                .ToList();
+            fullSequences = precursors.Select(p => p.FullSequence).ToList();
+            Assert.That(precursors.Count, Is.EqualTo(7));
+            Assert.That(precursors.All(p => p.NumFixedMods == 2));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Potassium on G]"));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UAC[Metal:Sodium on C]UG[Metal:Potassium on G]"));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]U[Metal:Sodium on U]ACUG[Metal:Potassium on G]"));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACU[Metal:Sodium on U]G[Metal:Potassium on G]"));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]U[Metal:Sodium on U]ACU[Metal:Sodium on U]G[Metal:Potassium on G]"));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UAC[Metal:Sodium on C]U[Metal:Sodium on U]G[Metal:Potassium on G]"));
+            Assert.That(fullSequences.Contains("G[Metal:Potassium on G]U[Metal:Sodium on U]AC[Metal:Sodium on C]UG[Metal:Potassium on G]"));
+        }
+
+        #endregion
+    }
+}
diff --git a/mzLib/Test/Transcriptomics/TestFragmentation.cs b/mzLib/Test/Transcriptomics/TestFragmentation.cs
new file mode 100644
index 000000000..6086ecb70
--- /dev/null
+++ b/mzLib/Test/Transcriptomics/TestFragmentation.cs
@@ -0,0 +1,239 @@
+﻿using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text;
+using System.Threading.Tasks;
+using Transcriptomics;
+using MassSpectrometry;
+using Omics.Fragmentation;
+using Omics.Fragmentation.Oligo;
+using Omics.Modifications;
+using Transcriptomics.Digestion;
+using UsefulProteomicsDatabases;
+
+namespace Test.Transcriptomics
+{
+    [TestFixture]
+    [ExcludeFromCodeCoverage]
+    internal class TestFragmentation
+    {
+
+        internal static IEnumerable<TestNucleicAcid.SixmerTestCase> GetSixMerIndividualFragmentTypeTestCases() =>
+            TestNucleicAcid.GetSixmerIndividualFragmentTypeTestCases();
+
+        [Test]
+        [TestCaseSource(nameof(GetSixMerIndividualFragmentTypeTestCases))]
+        public void TestGetNeutralFragments(TestNucleicAcid.SixmerTestCase testCase)
+        {
+            var rna = new RNA("GUACUG")
+                .Digest(new RnaDigestionParams(), new List<Modification>(), new List<Modification>())
+                .First() as OligoWithSetMods ?? throw new NullReferenceException();
+
+            var neutralFragments = rna.GetNeutralFragments(testCase.Type).ToList();
+            for (int i = 1; i < neutralFragments.Count; i++)
+            {
+                Assert.That(neutralFragments[i].NeutralMass, Is.EqualTo(testCase.NeutralMasses[i]).Within(0.01));
+            }
+        }
+
+
+        private static IEnumerable<DissociationType> ImplementedDissociationTypes
+        {
+            get
+            {
+                Loaders.LoadElements();
+                foreach (var type in DissociationTypeCollection.AllImplementedDissociationTypes)
+                    yield return type;
+            }
+        }
+
+        /// <summary>
+        /// This test makes the assumption that the M ion is a component of all product types
+        /// </summary>
+        /// <param name="type"></param>
+        [Test]
+        [TestCaseSource(nameof(ImplementedDissociationTypes))]
+        public void TestFragmentation_Unmodified_ProductCountsAreCorrect(DissociationType type)
+        {
+            Loaders.LoadElements();
+            List<Product> products = new();
+            var rnaToTest = new List<RNA>
+            {
+                new RNA("GUACUG"),
+                new RNA("GUACUGCACUGU"),
+                new RNA("GUACUGUAAUGAGACUAGUACAUGACAUG"),
+            };
+            var terminiToTest = new List<FragmentationTerminus> { FragmentationTerminus.Both, FragmentationTerminus.FivePrime, FragmentationTerminus.ThreePrime };
+            var potentialProducts = type.GetRnaProductTypesFromDissociationType();
+
+            // test with top down digestion and no modifications
+            var digestionparams = new RnaDigestionParams(rnase: "top-down");
+            var fixedMods = new List<Modification>();
+            var variableMods = new List<Modification>();
+            foreach (var term in terminiToTest)
+            {
+                foreach (var oligoWithSetMods in rnaToTest.Select(rna => rna.Digest(digestionparams, fixedMods, variableMods).First()))
+                {
+                    var terminalSpecifc = term == FragmentationTerminus.Both
+                        ? potentialProducts
+                        : potentialProducts.Where(p => p.GetRnaTerminusType() == term).ToList();
+
+                    var expectedProductCount = term == FragmentationTerminus.Both
+                        ? (oligoWithSetMods.Length - 1) * (terminalSpecifc.Count - 1) + 1 // there is only one M ion, so for both, remove that form muliplier and add one
+                        : (oligoWithSetMods.Length - 1) * terminalSpecifc.Count;
+
+                    oligoWithSetMods.Fragment(type, term, products);
+                    Assert.That(products.Count, Is.EqualTo(expectedProductCount));
+                    Assert.That(products.All(p => terminalSpecifc.Contains(p.ProductType)));
+                }
+            }
+        }
+
+        [Test]
+        [TestCase("GUACUG", "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//",
+            "GUA[Metal:Sodium on A]CUG", 1874.28, 1896.26, ProductType.a,
+            new[] { 267.089, 573.114, 902.167, 1207.208, 1513.233 },
+            new[] { 267.089, 573.114, 902.167 + 21.982, 1207.208 + 21.982, 1513.233 + 21.982 })]
+        [TestCase("GUACUG", "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//",
+            "GUA[Metal:Sodium on A]CUG", 1874.28, 1896.26, ProductType.b,
+            new[] { 283.084, 589.109, 918.162, 1223.203, 1529.228 },
+            new[] { 283.084, 589.109, 918.162 + 21.982, 1223.203 + 21.982, 1529.228 + 21.982 })]
+        [TestCase("GUACUG", "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//",
+            "GUA[Metal:Sodium on A]CUG", 1874.28, 1896.26, ProductType.c,
+            new[] { 347.055, 653.081, 982.133, 1287.174, 1593.2 },
+            new[] { 347.055, 653.081, 982.133 + 21.982, 1287.174 + 21.982, 1593.2 + 21.982 })]
+        [TestCase("GUACUG", "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//",
+            "GUA[Metal:Sodium on A]CUG", 1874.28, 1896.26, ProductType.d,
+            new[] { 363.05, 669.075, 998.128, 1303.169, 1609.195 },
+            new[] { 363.05, 669.075, 998.128 + 21.982, 1303.169 + 21.982, 1609.195 + 21.982 })]
+        [TestCase("GUACUG", "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//",
+            "GUA[Metal:Sodium on A]CUG", 1874.28, 1896.26, ProductType.dWaterLoss,
+            new[] { 345.039, 651.064, 980.116, 1285.157, 1591.184 },
+            new[] { 345.039, 651.064, 980.116 + 21.982, 1285.157 + 21.982, 1591.184 + 21.982 })]
+        [TestCase("GUACUG", "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//",
+            "GUA[Metal:Sodium on A]CUG", 1874.28, 1896.26, ProductType.w,
+            new[] { 363.049, 669.074, 974.115, 1303.169, 1609.195 },
+            new[] { 363.049, 669.074, 974.115, 1303.169 + 21.982, 1609.195 + 21.982 })]
+        [TestCase("GUACUG", "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//",
+            "GUA[Metal:Sodium on A]CUG", 1874.28, 1896.26, ProductType.x,
+            new[] { 347.055, 653.081, 958.122, 1287.174, 1593.2 },
+            new[] { 347.055, 653.081, 958.122, 1287.174 + 21.982, 1593.2 + 21.982 })]
+        [TestCase("GUACUG", "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//",
+            "GUA[Metal:Sodium on A]CUG", 1874.28, 1896.26, ProductType.y,
+            new[] { 283.084, 589.109, 894.15, 1223.203, 1529.228 },
+            new[] { 283.084, 589.109, 894.15, 1223.203 + 21.982, 1529.228 + 21.982 })]
+        [TestCase("GUACUG", "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//",
+            "GUA[Metal:Sodium on A]CUG", 1874.28, 1896.26, ProductType.z,
+            new[] { 267.089, 573.124, 878.156, 1207.208, 1513.233 },
+            new[] { 267.089, 573.124, 878.156, 1207.208 + 21.982, 1513.233 + 21.982 })]
+        public void TestFragmentation_Modified(string sequence, string modString, string fullSequence, double unmodifiedMass, double modifiedMass,
+            ProductType productType, double[] unmodifiedFragmentMass, double[] modifiedFragmentMasses)
+        {
+            var mods = PtmListLoader.ReadModsFromString(modString, out List<(Modification, string)> modsOut).ToList();
+            var rna = new RNA(sequence);
+
+            var unmodifiedOligo = rna.Digest(new RnaDigestionParams(), new List<Modification>(), new List<Modification>())
+                .First() as OligoWithSetMods ?? throw new NullReferenceException();
+            Assert.That(unmodifiedOligo.AllModsOneIsNterminus.Count, Is.EqualTo(0));
+            Assert.That(unmodifiedOligo.FullSequence, Is.EqualTo(sequence));
+            Assert.That(unmodifiedOligo.MonoisotopicMass, Is.EqualTo(unmodifiedMass).Within(0.01));
+
+            var modifiedOligo = rna.Digest(new RnaDigestionParams(), mods, new List<Modification>())
+                .First() as OligoWithSetMods ?? throw new NullReferenceException();
+            Assert.That(modifiedOligo.AllModsOneIsNterminus.Count, Is.EqualTo(mods.Count));
+            Assert.That(modifiedOligo.FullSequence, Is.EqualTo(fullSequence));
+            Assert.That(modifiedOligo.MonoisotopicMass, Is.EqualTo(modifiedMass).Within(0.01));
+
+            var unmodifiedProducts = unmodifiedOligo.GetNeutralFragments(productType).ToList();
+            Assert.That(unmodifiedProducts.Count, Is.EqualTo(5));
+            var modifiedProducts = modifiedOligo.GetNeutralFragments(productType).ToList();
+            Assert.That(modifiedProducts.Count, Is.EqualTo(5));
+
+
+            for (int i = 0; i < unmodifiedProducts.Count; i++)
+            {
+                var unModifedProduct = unmodifiedProducts[i];
+                var modifiedProduct = modifiedProducts[i];
+
+                Assert.That(unModifedProduct.NeutralMass, Is.EqualTo(unmodifiedFragmentMass[i]).Within(0.01));
+                Assert.That(modifiedProduct.NeutralMass, Is.EqualTo(modifiedFragmentMasses[i]).Within(0.01));
+            }
+        }
+
+
+        [Test]
+        [TestCaseSource(nameof(GetSixMerIndividualFragmentTypeTestCases))]
+        public void TestRnaFragments(TestNucleicAcid.SixmerTestCase testCase)
+        {
+            var rna = new RNA("GUACUG")
+                .Digest(new RnaDigestionParams(), new List<Modification>(), new List<Modification>())
+                .First() as OligoWithSetMods ?? throw new NullReferenceException();
+            List<Product> products = rna.GetNeutralFragments(testCase.Type).Select(p => (Product)p).ToList();
+
+            for (int i = 0; i < products.Count; i++)
+            {
+                var product = products[i];
+                Assert.That(testCase.Type, Is.EqualTo(product.ProductType));
+                Assert.That(testCase.Type.GetRnaTerminusType(), Is.EqualTo(product.Terminus));
+                Assert.That(testCase.NeutralMasses[i], Is.EqualTo(product.NeutralMass).Within(0.01));
+                Assert.That(testCase.NeutralMasses[i], Is.EqualTo(product.MonoisotopicMass).Within(0.01));
+                Assert.That(0, Is.EqualTo(product.NeutralLoss));
+                Assert.That(null, Is.EqualTo(product.SecondaryProductType));
+                Assert.That(0, Is.EqualTo(product.SecondaryFragmentNumber));
+
+                string annotation = $"{product.ProductType}{product.FragmentNumber}";
+                Assert.That(annotation, Is.EqualTo(product.Annotation));
+                string toString =
+                    $"{product.ProductType}{product.FragmentNumber};{product.NeutralMass:F5}-{product.NeutralLoss:0.##}";
+                Assert.That(toString, Is.EqualTo(product.ToString()));
+            }
+        }
+
+        [Test]
+        [TestCaseSource(nameof(GetSixMerIndividualFragmentTypeTestCases))]
+        public void TestRnaFragmentNumbers(TestNucleicAcid.SixmerTestCase testCase)
+        {
+            var rna = new RNA("GUACUG")
+                .Digest(new RnaDigestionParams(), new List<Modification>(), new List<Modification>())
+                .First() as OligoWithSetMods ?? throw new NullReferenceException();
+            List<Product> products = rna.GetNeutralFragments(testCase.Type).Select(p => (Product)p).ToList();
+
+            for (int i = 0; i < products.Count; i++)
+            {
+                var product = products[i];
+                bool isThreePrime = product.ProductType.GetRnaTerminusType() == FragmentationTerminus.ThreePrime;
+
+                int fragmentNumber = i + 1;
+                int residuePosition = isThreePrime ? rna.Length - fragmentNumber : fragmentNumber;
+
+                Assert.That(product.FragmentNumber, Is.EqualTo(fragmentNumber));
+                Assert.That(product.ResiduePosition, Is.EqualTo(residuePosition));
+            }
+
+        }
+
+        [Test]
+        public void TestConstructorAndEquality()
+        {
+            Product product1 = new Product(ProductType.d, FragmentationTerminus.FivePrime, 200, 4, 4, 0.0);
+            Product product2 = new Product(ProductType.d, FragmentationTerminus.FivePrime, 200, 4, 4, 0.0);
+            Product uniqueProduct = new Product(ProductType.a, FragmentationTerminus.FivePrime, 201, 4, 4, 0.0);
+
+            Assert.That(product1.Equals(product1));
+            Assert.That(product1.Equals(product2));
+            Assert.That(product1.GetHashCode(), Is.EqualTo(product2.GetHashCode()));
+            Assert.That(!product1.Equals(uniqueProduct));
+            Assert.That(!product1.Equals(null));
+            Assert.That(product1.GetHashCode(), Is.Not.EqualTo(uniqueProduct.GetHashCode()));
+
+            Assert.That(product1.Equals((object)product1));
+            Assert.That(product1.Equals((object)product2));
+            Assert.That(!product1.Equals((object)uniqueProduct));
+            Assert.That(!product1.Equals((object)new Product(ProductType.d, FragmentationTerminus.N, 200, 4, 4, 0.0)));
+            Assert.That(!product1.Equals((object)null));
+        }
+    }
+}
diff --git a/mzLib/Test/Transcriptomics/TestNucleicAcid.cs b/mzLib/Test/Transcriptomics/TestNucleicAcid.cs
new file mode 100644
index 000000000..efbf05020
--- /dev/null
+++ b/mzLib/Test/Transcriptomics/TestNucleicAcid.cs
@@ -0,0 +1,171 @@
+﻿using NUnit.Framework.Legacy;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using Chemistry;
+using Omics.Fragmentation;
+using Transcriptomics;
+using UsefulProteomicsDatabases;
+
+namespace Test.Transcriptomics
+{
+    /// <summary>
+    /// Test Data generated with  http://rna.rega.kuleuven.be/masspec/mongo.htm
+    /// </summary>
+    [TestFixture]
+    [ExcludeFromCodeCoverage]
+    internal class TestNucleicAcid
+    {
+        internal record SixmerTestCase(string Sequence, ProductType Type, double[] NeutralMasses, string[] ChemicalFormulas);
+
+        internal static IEnumerable<SixmerTestCase> GetSixmerIndividualFragmentTypeTestCases()
+        {
+            Loaders.LoadElements();
+
+            yield return new SixmerTestCase("GUACUG", ProductType.a,
+                new[] { 267.089, 573.114, 902.167, 1207.208, 1513.233 },
+                new[] { "C10H13N5O4", "C19H24N7O12P", "C29H36N12O18P2", "C38H48N15O25P3", "C47H59N17O33P4" });
+            yield return new SixmerTestCase("GUACUG", ProductType.b,
+            new[] { 283.084, 589.109, 918.162, 1223.203, 1529.228 },
+            new[] { "C10H13N5O5", "C19H24N7O13P", "C29H36N12O19P2", "C38H48N15O26P3", "C47H59N17O34P4" });
+            yield return new SixmerTestCase("GUACUG", ProductType.c,
+                new[] { 347.055, 653.081, 982.133, 1287.174, 1593.2 },
+            new[] { "C10H14N5O7P", "C19H25N7O15P2", "C29H37N12O21P3", "C38H49N15O28P4", "C47H60N17O36P5", });
+            yield return new SixmerTestCase("GUACUG", ProductType.d,
+                new[] { 363.05, 669.075, 998.128, 1303.169, 1609.195 },
+            new[] { "C10H14N5O8P", "C19H25N7O16P2", "C29H37N12O22P3", "C38H49N15O29P4", "C47H60N17O37P5", });
+            yield return new SixmerTestCase("GUACUG", ProductType.dWaterLoss,
+                new[] { 345.039, 651.064, 980.116, 1285.157, 1591.184 },
+            new[] { "C10H12N5O7P", "C19H23N7O15P2", "C29H35N12O21P3", "C38H47N15O28P4", "C47H58N17O36P5", });
+            yield return new SixmerTestCase("GUACUG", ProductType.w,
+                new[] { 363.049, 669.074, 974.115, 1303.169, 1609.195 },
+            new[] { "C10H14N5O8P", "C19H25N7O16P2", "C28H37N10O23P3", "C38H49N15O29P4", "C47H60N17O37P5", });
+            yield return new SixmerTestCase("GUACUG", ProductType.x,
+                new[] { 347.055, 653.081, 958.122, 1287.174, 1593.2 },
+            new[] { "C10H14N5O7P", "C19H25N7O15P2", "C28H37N10O22P3", "C38H49N15O28P4", "C47H60N17O36P5" });
+            yield return new SixmerTestCase("GUACUG", ProductType.y,
+                new[] { 283.084, 589.109, 894.15, 1223.203, 1529.228 },
+            new[] { "C10H13N5O5", "C19H24N7O13P", "C28H36N10O20P2", "C38H48N15O26P3", "C47H59N17O34P4", });
+            yield return new SixmerTestCase("GUACUG", ProductType.z,
+                new[] { 267.089, 573.124, 878.156, 1207.208, 1513.233 },
+            new[] { "C10H13N5O4", "C19H24N7O12P", "C28H36N10O19P2", "C38H48N15O25P3", "C47H59N17O33P4", });
+
+
+            yield return new SixmerTestCase("GUACUG", ProductType.aBaseLoss,
+                new[] { 114.03, 459.07, 765.095, 1094.147, 1399.198 },
+                new[] { "C5H6O3", "C15H18N5O10P", "C24H29N7O18P2", "C34H41N12O24P3", "C43H53N15O31P4" });
+            yield return new SixmerTestCase("GUACUG", ProductType.bBaseLoss,
+                new[] { 130.027, 475.074, 781.099, 1110.152, 1415.193 },
+                new[] { "C5H6O4", "C15H18N5O11P", "C24H29N7O19P2", "C34H41N12O25P3", "C43H53N15O32P4" });
+            yield return new SixmerTestCase("GUACUG", ProductType.cBaseLoss,
+                new[] { 193.998, 539.045, 845.071, 1174.123, 1479.164 },
+                new[] { "C5H7O6P", "C15H19N5O13P2", "C24H30N7O21P3", "C34H42N12O27P4", "C43H54N15O34P5" });
+            yield return new SixmerTestCase("GUACUG", ProductType.dBaseLoss,
+                new[] { 209.993, 555.04, 861.066, 1190.118, 1495.16 },
+                new[] { "C5H7O7P", "C15H19N5O14P2", "C24H30N7O22P3", "C34H42N12O28P4", "C43H54N15O35P5" });
+
+            // TODO: Add water loss besides d-H2O
+        }
+
+
+        [Test]
+        [TestCase("GUACUG", 1874.281)]
+        [TestCase("A", 267.096)]
+        [TestCase("C", 243.085)]
+        [TestCase("U", 244.069)]
+        [TestCase("G", 283.091)]
+        [TestCase("GU", 589.116)]
+        [TestCase("AAA", 925.200)]
+        [TestCase("CCC", 853.166)]
+        [TestCase("UUU", 856.119)]
+        [TestCase("GGG", 973.185)]
+        public void TestConstructorsAndEquality(string sequence, double monoMass)
+        {
+            // test constructors and equality
+            RNA rna = new RNA(sequence);
+
+            Assert.That(rna.Length, Is.EqualTo(sequence.Length));
+            Assert.That(rna.MonoisotopicMass, Is.EqualTo(monoMass).Within(0.01));
+            Assert.That(rna.GetChemicalFormula().MonoisotopicMass, Is.EqualTo(monoMass).Within(0.01));
+            Assert.That(rna.NucleicAcidArray.Length, Is.EqualTo(sequence.Length));
+            CollectionAssert.AreEqual(rna.NucleicAcidArray.Select(p => p.Letter), sequence);
+            Assert.That(rna.FivePrimeTerminus.Equals(NucleicAcid.DefaultFivePrimeTerminus));
+            Assert.That(rna.ThreePrimeTerminus.Equals(NucleicAcid.DefaultThreePrimeTerminus));
+            List<Nucleotide> nucList = new();
+            foreach (var nucleotide in sequence)
+            {
+                nucList.Add(Nucleotide.GetResidue(nucleotide));
+            }
+            Assert.That(rna.NucleicAcidArray.SequenceEqual(nucList.ToArray()));
+
+            var rna2 = new RNA(sequence, NucleicAcid.DefaultFivePrimeTerminus, NucleicAcid.DefaultThreePrimeTerminus);
+
+            Assert.That(rna2.Length, Is.EqualTo(sequence.Length));
+            Assert.That(rna2.MonoisotopicMass, Is.EqualTo(monoMass).Within(0.01));
+            Assert.That(rna.FivePrimeTerminus.Equals(NucleicAcid.DefaultFivePrimeTerminus));
+            Assert.That(rna.ThreePrimeTerminus.Equals(NucleicAcid.DefaultThreePrimeTerminus));
+            nucList.Clear();
+            foreach (var nucleotide in sequence)
+            {
+                nucList.Add(Nucleotide.GetResidue(nucleotide));
+            }
+            Assert.That(rna.NucleicAcidArray.SequenceEqual(nucList.ToArray()));
+
+            Assert.That(rna.Equals(rna2));
+            Assert.That(rna.Equals(rna));
+            Assert.That(!rna.Equals(null));
+            Assert.That(rna.Equals((object)rna2));
+            Assert.That(rna.Equals((object)rna));
+            Assert.That(!rna.Equals((object)null));
+            Assert.That(!rna.Equals((object)new Double()));
+        }
+
+        [Test]
+        public void TestParseSequence()
+        {
+            var rna1 = new RNA("GUACUG");
+            var rna2 = new RNA("GU ACU G");
+            var rna3 = new RNA("GU*ACU*G");
+
+            Assert.That(rna1.BaseSequence, Is.EqualTo(rna2.BaseSequence));
+            Assert.That(rna1.BaseSequence, Is.EqualTo(rna3.BaseSequence));
+            Assert.That(rna1.GetHashCode(), Is.EqualTo(rna3.GetHashCode()));
+            Assert.That(rna1.GetHashCode(), Is.EqualTo(rna3.GetHashCode()));
+            Assert.That(rna1.Length, Is.EqualTo(rna3.Length));
+            Assert.That(rna1.Length, Is.EqualTo(rna3.Length));
+
+            Assert.Throws<ArgumentException>(() => new RNA("GUA~CUG"));
+        }
+
+        [Test]
+        [TestCase("GUACUG", new[] { -1, -2, -3, -4, -5 }, new[] { 1873.273, 936.133, 623.752, 467.562, 373.848 })]
+        public void TestElectroSpraySeries(string sequence, int[] charges, double[] mzs)
+        {
+            RNA rna = new(sequence);
+
+            int i = 0;
+            foreach (var ion in rna.GetElectrospraySeries(charges.First(), charges.Last()))
+            {
+                Assert.That(ion, Is.EqualTo(mzs[i]).Within(0.001));
+                i++;
+            }
+        }
+
+        [Test]
+        [TestCase("GUACUG", new[] { -1, -2, -3, -4, -5, -6 }, new[] { 1953.239, 976.116, 650.408, 487.554, 389.841, 324.700 })]
+        public void TestReplaceTerminusWithElectroSpraySeries(string sequence, int[] charges, double[] mzs)
+        {
+            RNA rna = new("GUACUG");
+            rna.FivePrimeTerminus = new ChemicalFormula();
+
+            int i = 0;
+            foreach (var ion in rna.GetElectrospraySeries(charges.First(), charges.Last()))
+            {
+                Assert.That(ion, Is.EqualTo(mzs[i]).Within(0.001));
+                i++;
+            }
+        }
+    }
+}
diff --git a/mzLib/Test/Transcriptomics/TestProductType.cs b/mzLib/Test/Transcriptomics/TestProductType.cs
new file mode 100644
index 000000000..be9dc2f93
--- /dev/null
+++ b/mzLib/Test/Transcriptomics/TestProductType.cs
@@ -0,0 +1,280 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Chemistry;
+using MassSpectrometry;
+using NUnit.Framework;
+using NUnit.Framework.Legacy;
+using Omics.Fragmentation;
+using Omics.Fragmentation.Oligo;
+using Omics.Modifications;
+using Transcriptomics;
+using Transcriptomics.Digestion;
+
+namespace Test.Transcriptomics
+{
+    [TestFixture]
+    [ExcludeFromCodeCoverage]
+    public class TestProductType
+    {
+        [Test]
+        [TestCase(DissociationType.HCD, new[] { ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d,
+            ProductType.dWaterLoss, ProductType.w, ProductType.x, ProductType.y, ProductType.z, ProductType.M })]
+        [TestCase(DissociationType.CID, new[] { ProductType.a, ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss,
+            ProductType.w, ProductType.y, ProductType.yWaterLoss, ProductType.M })]
+        public void TestProductTypes_Dissociation(DissociationType dissociation, ProductType[] products)
+        {
+            CollectionAssert.AreEquivalent(products, dissociation.GetRnaProductTypesFromDissociationType());
+        }
+
+        [Test]
+        [TestCase(FragmentationTerminus.FivePrime, new[]
+        {
+            ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,
+            ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,
+            ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,
+            ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,
+        })]
+        [TestCase(FragmentationTerminus.ThreePrime, new[]
+        {
+            ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,
+            ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,
+            ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,
+            ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,
+        })]
+        public void TestProductTypes_Terminus(FragmentationTerminus terminus, ProductType[] products)
+        {
+            CollectionAssert.AreEquivalent(products, terminus.GetRnaTerminusSpecificProductTypes());
+        }
+
+        [Test]
+        [TestCase(DissociationType.HCD, FragmentationTerminus.FivePrime, new[]
+        { ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d, ProductType.dWaterLoss, })]
+        [TestCase(DissociationType.HCD, FragmentationTerminus.ThreePrime, new[]
+            { ProductType.w, ProductType.x, ProductType.y, ProductType.z, })]
+        [TestCase(DissociationType.HCD, FragmentationTerminus.Both, new[]
+            { ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d, ProductType.dWaterLoss, ProductType.w, ProductType.x, ProductType.y, ProductType.z, ProductType.M })]
+        [TestCase(DissociationType.CID, FragmentationTerminus.FivePrime, new[]
+            { ProductType.a, ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss })]
+        [TestCase(DissociationType.CID, FragmentationTerminus.ThreePrime, new[]
+            { ProductType.w, ProductType.y, ProductType.yWaterLoss })]
+        [TestCase(DissociationType.CID, FragmentationTerminus.Both, new[]
+            { ProductType.a, ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w, ProductType.y, ProductType.yWaterLoss, ProductType.M })]
+        public void TestProductTypes_TerminusAndDissociation(DissociationType dissociation, FragmentationTerminus terminus, ProductType[] products)
+        {
+            CollectionAssert.AreEquivalent(products, dissociation.GetRnaTerminusSpecificProductTypesFromDissociation(terminus));
+        }
+
+        [Test]
+        public static void Test_NeutralMassShiftFromProductType()
+        {
+            foreach (ProductType p in Enum.GetValues(typeof(ProductType)))
+            {
+                double mass = 0;
+                switch (p)
+                {
+                    case ProductType.a:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("H").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.b:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("OH").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.c:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O3H2P").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.x:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O-1H").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.y:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O-3P-1").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.zWaterLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O-5H-2P-1").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.aWaterLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("H-1O-1").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.aBaseLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("H-2").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.bBaseLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O1H-2").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.cWaterLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O2P").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.cBaseLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O3H-1P").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.d:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O4H2P").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.dWaterLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O3P").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.dBaseLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O4H-1P").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+
+                    case ProductType.w:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("H").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.wWaterLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("H-1O-1").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.xWaterLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O-2H-1").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.yWaterLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O-4H-2P-1").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.z:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O-4P-1").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+
+                    case ProductType.wBaseLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("H-2").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+                    case ProductType.xBaseLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O-1H-2").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+                    case ProductType.yBaseLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O-3H-2P-1").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+                    case ProductType.zBaseLoss:
+                        mass = p.GetRnaMassShiftFromProductType().RoundedDouble(2).Value;
+                        Assert.That(ChemicalFormula.ParseFormula("O-4H-3P-1").MonoisotopicMass.RoundedDouble(2).Value, Is.EqualTo(mass));
+                        break;
+                }
+            }
+        }
+
+        [Test]
+        public void TestProductTypes_GetRnaTerminusType()
+        {
+            foreach (var type in Enum.GetValues<ProductType>())
+            {
+                switch (type)
+                {
+                    case ProductType.a:
+                    case ProductType.aWaterLoss:
+                    case ProductType.aBaseLoss:
+                    case ProductType.b:
+                    case ProductType.bWaterLoss:
+                    case ProductType.bBaseLoss:
+                    case ProductType.c:
+                    case ProductType.cWaterLoss:
+                    case ProductType.cBaseLoss:
+                    case ProductType.d:
+                    case ProductType.dWaterLoss:
+                    case ProductType.dBaseLoss:
+                        Assert.That(type.GetRnaTerminusType(), Is.EqualTo(FragmentationTerminus.FivePrime));
+                        break;
+
+                    case ProductType.w:
+                    case ProductType.wWaterLoss:
+                    case ProductType.wBaseLoss:
+                    case ProductType.x:
+                    case ProductType.xWaterLoss:
+                    case ProductType.xBaseLoss:
+                    case ProductType.y:
+                    case ProductType.yWaterLoss:
+                    case ProductType.yBaseLoss:
+                    case ProductType.z:
+                    case ProductType.zWaterLoss:
+                    case ProductType.zBaseLoss:
+                        Assert.That(type.GetRnaTerminusType(), Is.EqualTo(FragmentationTerminus.ThreePrime));
+                        break;
+
+                    case ProductType.M:
+                        Assert.That(type.GetRnaTerminusType(), Is.EqualTo(FragmentationTerminus.None));
+                        break;
+
+                    case ProductType.aStar:
+                    case ProductType.bAmmoniaLoss:
+                    case ProductType.D:
+                    case ProductType.Ycore:
+                    case ProductType.Y:
+                    case ProductType.aDegree:
+                    case ProductType.yAmmoniaLoss:
+                    case ProductType.zPlusOne:
+                    case ProductType.zDot:
+                        Assert.Throws<ArgumentOutOfRangeException>(() => type.GetRnaTerminusType());
+                        break;
+                    default:
+                        throw new ArgumentOutOfRangeException();
+                }
+            }
+        }
+
+        [Test]
+        [TestCase(ProductType.a, ProductType.aWaterLoss)]
+        [TestCase(ProductType.b, ProductType.bWaterLoss)]
+        [TestCase(ProductType.c, ProductType.cWaterLoss)]
+        [TestCase(ProductType.d, ProductType.dWaterLoss)]
+        [TestCase(ProductType.w, ProductType.wWaterLoss)]
+        [TestCase(ProductType.x, ProductType.xWaterLoss)]
+        [TestCase(ProductType.y, ProductType.yWaterLoss)]
+        [TestCase(ProductType.z, ProductType.zWaterLoss)]
+        public void EnsureWaterLossMassesAreCorrect(ProductType normal, ProductType waterLoss)
+        {
+            var rna = new RNA("GUACUG")
+                .Digest(new RnaDigestionParams(), new List<Modification>(), new List<Modification>())
+                .First() as OligoWithSetMods ?? throw new NullReferenceException();
+
+            List<Product> normalFragments = rna.GetNeutralFragments(normal).ToList();
+            List<Product> waterLossFragments = rna.GetNeutralFragments(waterLoss).ToList();
+            for (var index = 0; index < waterLossFragments.Count; index++)
+            {
+                var waterLossFragment = waterLossFragments[index];
+                var normalFragment = normalFragments[index];
+                var watermass = 2 * Constants.ProtonMass + PeriodicTable.GetElement("O").PrincipalIsotope.AtomicMass;
+
+                Assert.That(normalFragment.MonoisotopicMass, Is.EqualTo(waterLossFragment.MonoisotopicMass + watermass).Within(0.01));
+            }
+        }
+    }
+}
diff --git a/mzLib/Test/Transcriptomics/TestRnase.cs b/mzLib/Test/Transcriptomics/TestRnase.cs
index db7d3e3dc..e72c12e11 100644
--- a/mzLib/Test/Transcriptomics/TestRnase.cs
+++ b/mzLib/Test/Transcriptomics/TestRnase.cs
@@ -1,5 +1,4 @@
 ﻿using NUnit.Framework;
-using Assert = NUnit.Framework.Legacy.ClassicAssert;
 using System.Diagnostics.CodeAnalysis;
 using System.IO;
 using Proteomics.ProteolyticDigestion;
@@ -16,7 +15,7 @@ internal class TestRnase
         public void TestRnaseDictionaryLoading()
         {
             var rnaseCountFromTsv = File.ReadAllLines(rnaseTsvpath).Length - 1;
-            Assert.AreEqual(RnaseDictionary.Dictionary.Count, rnaseCountFromTsv);
+            Assert.That(RnaseDictionary.Dictionary.Count, Is.EqualTo(rnaseCountFromTsv));
         }
 
         [Test]
diff --git a/mzLib/Transcriptomics/NucleicAcid.cs b/mzLib/Transcriptomics/NucleicAcid.cs
index ef6b74cf9..5a42b7d67 100644
--- a/mzLib/Transcriptomics/NucleicAcid.cs
+++ b/mzLib/Transcriptomics/NucleicAcid.cs
@@ -16,7 +16,6 @@ namespace Transcriptomics
     /// </summary>
     public abstract class NucleicAcid : INucleicAcid, IBioPolymer, IEquatable<NucleicAcid>
     {
-
         #region Static Properties
 
         /// <summary>
@@ -43,21 +42,40 @@ public abstract class NucleicAcid : INucleicAcid, IBioPolymer, IEquatable<Nuclei
 
         #region Constuctors
 
+        /// <summary>
+        /// For creating an RNA programatically
+        /// </summary>
+        /// <param name="sequence"></param>
+        /// <param name="fivePrimeTerm"></param>
+        /// <param name="threePrimeTerm"></param>
+        /// <param name="oneBasedPossibleLocalizedModifications"></param>
         protected NucleicAcid(string sequence, IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null,
             IDictionary<int, List<Modification>>? oneBasedPossibleLocalizedModifications = null)
         {
             MonoisotopicMass = 0;
-            Length = sequence.Length;
-            _nucleicAcids = new Nucleotide[Length];
+            _nucleicAcids = new Nucleotide[sequence.Length];
             ThreePrimeTerminus = threePrimeTerm ??= DefaultThreePrimeTerminus;
             FivePrimeTerminus = fivePrimeTerm ??= DefaultFivePrimeTerminus;
             _oneBasedPossibleLocalizedModifications = oneBasedPossibleLocalizedModifications ?? new Dictionary<int, List<Modification>>();
             GeneNames = new List<Tuple<string, string>>();
 
-
             ParseSequence(sequence);
         }
 
+        /// <summary>
+        /// For Reading in from rna database
+        /// </summary>
+        /// <param name="sequence"></param>
+        /// <param name="name"></param>
+        /// <param name="identifier"></param>
+        /// <param name="organism"></param>
+        /// <param name="databaseFilePath"></param>
+        /// <param name="fivePrimeTerm"></param>
+        /// <param name="threePrimeTerm"></param>
+        /// <param name="oneBasedPossibleLocalizedModifications"></param>
+        /// <param name="isContaminant"></param>
+        /// <param name="isDecoy"></param>
+        /// <param name="additionalDatabaseFields"></param>
         protected NucleicAcid(string sequence, string name, string identifier, string organism, string databaseFilePath,
             IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null,
             IDictionary<int, List<Modification>>? oneBasedPossibleLocalizedModifications = null,
@@ -126,10 +144,8 @@ public IHasChemicalFormula ThreePrimeTerminus
         /// <summary>
         /// Gets the number of nucleic acids in this nucleic acid polymer
         /// </summary>
-        public int Length { get; private set; }
-
+        public int Length => BaseSequence.Length;
 
-        // TODO: These interface members
         public string Name { get; }
         public string FullName => Name; // TODO: Consider if this needs to be different from the name
         public string DatabaseFilePath { get; }
@@ -257,7 +273,7 @@ public ChemicalFormula GetChemicalFormula()
 
         #region Private Methods
 
-        bool ReplaceTerminus(ref IHasChemicalFormula terminus, IHasChemicalFormula value)
+        private bool ReplaceTerminus(ref IHasChemicalFormula terminus, IHasChemicalFormula value)
         {
             if (Equals(value, terminus))
                 return false;
@@ -319,7 +335,6 @@ private bool ParseSequence(string sequence)
             }
 
             _sequence = sb.ToString();
-            Length = index;
             MonoisotopicMass += monoMass;
             Array.Resize(ref _nucleicAcids, Length);
 

From a09d90a20daf48ffde25817223e837ed96d7f407 Mon Sep 17 00:00:00 2001
From: nbollis <nbollis@comcast.net>
Date: Thu, 19 Sep 2024 17:49:43 -0500
Subject: [PATCH 03/17] Made initial tests pass

---
 mzLib/Test/Transcriptomics/TestDigestion.cs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mzLib/Test/Transcriptomics/TestDigestion.cs b/mzLib/Test/Transcriptomics/TestDigestion.cs
index 6b385be0f..fe315ffd6 100644
--- a/mzLib/Test/Transcriptomics/TestDigestion.cs
+++ b/mzLib/Test/Transcriptomics/TestDigestion.cs
@@ -3,6 +3,7 @@
 using System.Diagnostics.CodeAnalysis;
 using System.IO;
 using System.Linq;
+using System.Reflection.Metadata.Ecma335;
 using System.Security.Cryptography;
 using Chemistry;
 using MassSpectrometry;
@@ -304,15 +305,15 @@ public static void TestDigestionAndFragmentation(string sequence, double monoMas
                 List<Product> fragments = new();
                 digestionProduct.Fragment(DissociationType.CID, FragmentationTerminus.Both, fragments);
 
-                List<(int FragmentNumber, ProductType Type, double Mass)[]> ughh = new();
-
                 // test that fragments are correct
                 var fragmentsToCompare = DigestFragmentTestCases
                     .Where(p => p.Sequence.Equals(digestionProduct.BaseSequence)).ToList();
                 for (var i = 0; i < fragments.Count; i++)
                 {
                     var fragment = fragments[i];
-                    var theoreticalFragment = fragmentsToCompare[i];
+                    var theoreticalFragment = fragmentsToCompare.FirstOrDefault(p =>
+                        p.FragmentNumber == fragment.FragmentNumber && p.Type == fragment.ProductType);
+                    if (theoreticalFragment.Mass is 0.0 ) continue;
                     Assert.That(fragment.MonoisotopicMass, Is.EqualTo(theoreticalFragment.Mass).Within(0.01));
                     Assert.That(fragment.FragmentNumber, Is.EqualTo(theoreticalFragment.FragmentNumber));
                     Assert.That(fragment.ProductType, Is.EqualTo(theoreticalFragment.Type));

From 4dfb542544f8e336512035d80d10b53510d9abca Mon Sep 17 00:00:00 2001
From: nbollis <nbollis@comcast.net>
Date: Thu, 19 Sep 2024 18:19:31 -0500
Subject: [PATCH 04/17] Removed unnecessary namespaces

---
 mzLib/MzLibUtil/ClassExtensions.cs                    | 2 +-
 mzLib/Test/Transcriptomics/TestDigestion.cs           | 3 ---
 mzLib/Test/Transcriptomics/TestFragmentation.cs       | 3 ---
 mzLib/Test/Transcriptomics/TestProductType.cs         | 2 --
 mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs   | 7 +------
 mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs   | 6 ------
 mzLib/Transcriptomics/Digestion/RnaDigestionParams.cs | 6 ------
 7 files changed, 2 insertions(+), 27 deletions(-)

diff --git a/mzLib/MzLibUtil/ClassExtensions.cs b/mzLib/MzLibUtil/ClassExtensions.cs
index 05e5cfd1e..0129154a4 100644
--- a/mzLib/MzLibUtil/ClassExtensions.cs
+++ b/mzLib/MzLibUtil/ClassExtensions.cs
@@ -19,7 +19,6 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
-using System.Text;
 using System.Text.RegularExpressions;
 
 namespace MzLibUtil
@@ -123,5 +122,6 @@ public static string GetPeriodTolerantFilenameWithoutExtension(this string fileP
         {
             return PeriodTolerantFilenameWithoutExtension.GetPeriodTolerantFilenameWithoutExtension(filePath);
         }
+
     }
 }
\ No newline at end of file
diff --git a/mzLib/Test/Transcriptomics/TestDigestion.cs b/mzLib/Test/Transcriptomics/TestDigestion.cs
index fe315ffd6..bf31392ca 100644
--- a/mzLib/Test/Transcriptomics/TestDigestion.cs
+++ b/mzLib/Test/Transcriptomics/TestDigestion.cs
@@ -3,11 +3,8 @@
 using System.Diagnostics.CodeAnalysis;
 using System.IO;
 using System.Linq;
-using System.Reflection.Metadata.Ecma335;
-using System.Security.Cryptography;
 using Chemistry;
 using MassSpectrometry;
-using MathNet.Numerics.Distributions;
 using NUnit.Framework;
 using Omics.Digestion;
 using Omics.Fragmentation;
diff --git a/mzLib/Test/Transcriptomics/TestFragmentation.cs b/mzLib/Test/Transcriptomics/TestFragmentation.cs
index 6086ecb70..8f7bb2d78 100644
--- a/mzLib/Test/Transcriptomics/TestFragmentation.cs
+++ b/mzLib/Test/Transcriptomics/TestFragmentation.cs
@@ -3,9 +3,6 @@
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
 using System.Linq;
-using System.Runtime.CompilerServices;
-using System.Text;
-using System.Threading.Tasks;
 using Transcriptomics;
 using MassSpectrometry;
 using Omics.Fragmentation;
diff --git a/mzLib/Test/Transcriptomics/TestProductType.cs b/mzLib/Test/Transcriptomics/TestProductType.cs
index be9dc2f93..f9c459211 100644
--- a/mzLib/Test/Transcriptomics/TestProductType.cs
+++ b/mzLib/Test/Transcriptomics/TestProductType.cs
@@ -2,8 +2,6 @@
 using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
 using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
 using Chemistry;
 using MassSpectrometry;
 using NUnit.Framework;
diff --git a/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs b/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
index a741638c5..3a7382e7c 100644
--- a/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
+++ b/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
@@ -1,9 +1,4 @@
-﻿using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-using Chemistry;
+﻿using Chemistry;
 using Omics.Digestion;
 using Omics.Modifications;
 
diff --git a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
index 92b5e501c..966f97c50 100644
--- a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
+++ b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
@@ -4,12 +4,6 @@
 using Omics.Fragmentation;
 using Omics.Modifications;
 using Omics;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Security.Cryptography;
-using System.Text;
-using System.Threading.Tasks;
 using Easy.Common.Extensions;
 using Omics.Fragmentation.Oligo;
 
diff --git a/mzLib/Transcriptomics/Digestion/RnaDigestionParams.cs b/mzLib/Transcriptomics/Digestion/RnaDigestionParams.cs
index 379e48fa9..fb80a1a0b 100644
--- a/mzLib/Transcriptomics/Digestion/RnaDigestionParams.cs
+++ b/mzLib/Transcriptomics/Digestion/RnaDigestionParams.cs
@@ -1,10 +1,5 @@
 ﻿using Omics.Digestion;
 using Omics.Fragmentation;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
 
 namespace Transcriptomics.Digestion
 {
@@ -12,7 +7,6 @@ public class RnaDigestionParams : IDigestionParams
     {
 
         // this parameterless constructor needs to exist to read the toml.
-        // if you can figure out a way to get rid of it, feel free...
         public RnaDigestionParams() : this("top-down")
         {
         }

From 2f6b6cb975679c1e9856cec28a26914079b6c312 Mon Sep 17 00:00:00 2001
From: nbollis <nbollis@comcast.net>
Date: Thu, 19 Sep 2024 19:17:43 -0500
Subject: [PATCH 05/17] Expanded test coverage

---
 mzLib/Test/Transcriptomics/TestDigestion.cs   | 26 +++++++++++++++++++
 .../Test/Transcriptomics/TestFragmentation.cs |  4 +--
 mzLib/Test/Transcriptomics/TestNucleicAcid.cs |  6 ++---
 mzLib/Test/Transcriptomics/TestNucleotide.cs  |  6 ++---
 mzLib/Test/Transcriptomics/TestRnase.cs       |  2 +-
 5 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/mzLib/Test/Transcriptomics/TestDigestion.cs b/mzLib/Test/Transcriptomics/TestDigestion.cs
index bf31392ca..d210b6a2e 100644
--- a/mzLib/Test/Transcriptomics/TestDigestion.cs
+++ b/mzLib/Test/Transcriptomics/TestDigestion.cs
@@ -555,6 +555,32 @@ public void TestDigestionParams_Properties(RnaDigestionTestCase testCase)
             Assert.That(digestionProducts.Count(), Is.EqualTo(testCase.DigestionProductCount));
         }
 
+        [Test]
+        public void TestDigestionParamsClone()
+        {
+            var digestionParams = new RnaDigestionParams("top-down", 0, 3, 20000);
+            var cloned = digestionParams.Clone(FragmentationTerminus.C);
+
+            // set new terminus, all values except terminus are retained
+            Assert.That(digestionParams.DigestionAgent, Is.EqualTo(cloned.DigestionAgent));
+            Assert.That(digestionParams.MaxMissedCleavages, Is.EqualTo(cloned.MaxMissedCleavages));
+            Assert.That(digestionParams.MinLength, Is.EqualTo(cloned.MinLength));
+            Assert.That(digestionParams.MaxLength, Is.EqualTo(cloned.MaxLength));
+            Assert.That(digestionParams.MaxMods, Is.EqualTo(cloned.MaxMods));
+            Assert.That(digestionParams.FragmentationTerminus, Is.Not.EqualTo(cloned.FragmentationTerminus));
+            Assert.That(cloned.FragmentationTerminus, Is.EqualTo(FragmentationTerminus.C));
+
+            // do not set new terminus, all values are retained
+            cloned = digestionParams.Clone();
+            Assert.That(digestionParams.DigestionAgent, Is.EqualTo(cloned.DigestionAgent));
+            Assert.That(digestionParams.MaxMissedCleavages, Is.EqualTo(cloned.MaxMissedCleavages));
+            Assert.That(digestionParams.MinLength, Is.EqualTo(cloned.MinLength));
+            Assert.That(digestionParams.MaxLength, Is.EqualTo(cloned.MaxLength));
+            Assert.That(digestionParams.MaxMods, Is.EqualTo(cloned.MaxMods));
+            Assert.That(digestionParams.FragmentationTerminus, Is.EqualTo(cloned.FragmentationTerminus));
+            Assert.That(cloned.FragmentationTerminus, Is.EqualTo(FragmentationTerminus.Both));
+        }
+
         #endregion
 
         #region NucleicAcid
diff --git a/mzLib/Test/Transcriptomics/TestFragmentation.cs b/mzLib/Test/Transcriptomics/TestFragmentation.cs
index 8f7bb2d78..fea764246 100644
--- a/mzLib/Test/Transcriptomics/TestFragmentation.cs
+++ b/mzLib/Test/Transcriptomics/TestFragmentation.cs
@@ -15,10 +15,10 @@ namespace Test.Transcriptomics
 {
     [TestFixture]
     [ExcludeFromCodeCoverage]
-    internal class TestFragmentation
+    public class TestFragmentation
     {
 
-        internal static IEnumerable<TestNucleicAcid.SixmerTestCase> GetSixMerIndividualFragmentTypeTestCases() =>
+        public static IEnumerable<TestNucleicAcid.SixmerTestCase> GetSixMerIndividualFragmentTypeTestCases() =>
             TestNucleicAcid.GetSixmerIndividualFragmentTypeTestCases();
 
         [Test]
diff --git a/mzLib/Test/Transcriptomics/TestNucleicAcid.cs b/mzLib/Test/Transcriptomics/TestNucleicAcid.cs
index efbf05020..a0c5619c9 100644
--- a/mzLib/Test/Transcriptomics/TestNucleicAcid.cs
+++ b/mzLib/Test/Transcriptomics/TestNucleicAcid.cs
@@ -16,11 +16,11 @@ namespace Test.Transcriptomics
     /// </summary>
     [TestFixture]
     [ExcludeFromCodeCoverage]
-    internal class TestNucleicAcid
+    public class TestNucleicAcid
     {
-        internal record SixmerTestCase(string Sequence, ProductType Type, double[] NeutralMasses, string[] ChemicalFormulas);
+        public record SixmerTestCase(string Sequence, ProductType Type, double[] NeutralMasses, string[] ChemicalFormulas);
 
-        internal static IEnumerable<SixmerTestCase> GetSixmerIndividualFragmentTypeTestCases()
+        public static IEnumerable<SixmerTestCase> GetSixmerIndividualFragmentTypeTestCases()
         {
             Loaders.LoadElements();
 
diff --git a/mzLib/Test/Transcriptomics/TestNucleotide.cs b/mzLib/Test/Transcriptomics/TestNucleotide.cs
index df250fd40..277ebc3d6 100644
--- a/mzLib/Test/Transcriptomics/TestNucleotide.cs
+++ b/mzLib/Test/Transcriptomics/TestNucleotide.cs
@@ -9,12 +9,12 @@
 namespace Test.Transcriptomics
 {
     [ExcludeFromCodeCoverage]
-    internal class TestNucleotide
+    public class TestNucleotide
     {
-        internal record NucleotideTestCase(Nucleotide Nucleotide, string Name, char OneLetterCode, string Symbol, ChemicalFormula Formula, double Mass,
+        public record NucleotideTestCase(Nucleotide Nucleotide, string Name, char OneLetterCode, string Symbol, ChemicalFormula Formula, double Mass,
             ChemicalFormula nucleosideFormula);
 
-        internal static IEnumerable<NucleotideTestCase> GetNucleotideTestCases()
+        public static IEnumerable<NucleotideTestCase> GetNucleotideTestCases()
         {
             Loaders.LoadElements();
 
diff --git a/mzLib/Test/Transcriptomics/TestRnase.cs b/mzLib/Test/Transcriptomics/TestRnase.cs
index e72c12e11..b122f32bd 100644
--- a/mzLib/Test/Transcriptomics/TestRnase.cs
+++ b/mzLib/Test/Transcriptomics/TestRnase.cs
@@ -7,7 +7,7 @@
 namespace Test.Transcriptomics
 {
     [ExcludeFromCodeCoverage]
-    internal class TestRnase
+    public class TestRnase
     {
         public static string rnaseTsvpath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"Digestion\rnases.tsv");
 

From c9041b09684b53899d37e676689ca07daffa285a Mon Sep 17 00:00:00 2001
From: Nic Bollis <nbollis@wisc.edu>
Date: Tue, 24 Sep 2024 14:30:48 -0500
Subject: [PATCH 06/17] Responded to Alex Comments

---
 .../Oligo/DissociationTypeCollection.cs       | 262 +++++++++++++++++-
 .../Digestion/NucleolyticOligo.cs             |  16 +-
 .../Digestion/OligoWithSetMods.cs             |  54 ++--
 mzLib/Transcriptomics/NucleicAcid.cs          |  43 +--
 4 files changed, 313 insertions(+), 62 deletions(-)

diff --git a/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
index 3bc08d089..7b5a411ee 100644
--- a/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
+++ b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
@@ -1,4 +1,258 @@
-﻿using Chemistry;using MassSpectrometry;namespace Omics.Fragmentation.Oligo{    /// <summary>    /// Methods dealing with specific product type for RNA molecules    /// </summary>    public static class DissociationTypeCollection    {
-        /// <summary>        /// Product Ion types by dissociation method        /// </summary>        /// <remarks>        /// HCD ions were taken from the following paper: https://www.nature.com/articles/s41598-023-36193-2        /// Ion types below here should be validated with experimental results.        /// Base and water losses occur very frequently and may also be present in these activation types.        /// CID, UVPD, and aEPD ions were taken from the following paper: https://pubs.acs.org/doi/10.1021/acs.analchem.3c05428?ref=PDF        /// NETD ions were taken from the following paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7161943/        /// lowCID ions were taken from this Thermo Poster: https://assets.thermofisher.com/TFS-Assets/CMD/Flyers/fl-489263-asms23-optimized-fragmentation-oligonucleotides-suppresses-undesired-fragmentation-fl489263-en.pdf        /// </remarks>        public static Dictionary<DissociationType, List<ProductType>> ProductsFromDissociationType =            new Dictionary<DissociationType, List<ProductType>>()            {                { DissociationType.Unknown, new List<ProductType>() },                { DissociationType.Custom, new List<ProductType>() },                {                    DissociationType.AnyActivationType, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.aWaterLoss,                        ProductType.b, ProductType.bBaseLoss, ProductType.bWaterLoss,                        ProductType.c, ProductType.cBaseLoss, ProductType.cWaterLoss,                        ProductType.d, ProductType.dBaseLoss, ProductType.dWaterLoss,                        ProductType.w, ProductType.wBaseLoss, ProductType.wWaterLoss,                        ProductType.x, ProductType.xBaseLoss, ProductType.xWaterLoss,                        ProductType.y, ProductType.yBaseLoss, ProductType.yWaterLoss,                        ProductType.z, ProductType.zBaseLoss, ProductType.zWaterLoss,                        ProductType.M                    }                },                {                    DissociationType.CID, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,                        ProductType.y, ProductType.yWaterLoss, ProductType.M                    }                },                {                    DissociationType.HCD, new List<ProductType>                    {                        ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d,                        ProductType.dWaterLoss, ProductType.w, ProductType.x, ProductType.y, ProductType.z,                        ProductType.M                    }                },                {                    DissociationType.UVPD, new List<ProductType>                    {                        ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.M                    }                },                {                    DissociationType.aEPD, new List<ProductType>                    {                        ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.x, ProductType.z, ProductType.M                    }                },                {                    DissociationType.NETD, new List<ProductType>                    {                        ProductType.w, ProductType.d, ProductType.M                    }                },                {                    DissociationType.LowCID, new List<ProductType>()                    {                        ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,                        ProductType.y, ProductType.yWaterLoss, ProductType.M                    }                },                { DissociationType.IRMPD, new List<ProductType>() { } },                { DissociationType.ECD, new List<ProductType> { } },                { DissociationType.PQD, new List<ProductType> { } },                { DissociationType.ETD, new List<ProductType> { } },                { DissociationType.EThcD, new List<ProductType> { } },            };
-
-        /// <summary>        /// Returns all dissociation types with implemented product type collections        /// </summary>        public static IEnumerable<DissociationType> AllImplementedDissociationTypes =>            ProductsFromDissociationType.Where(p => p.Value.Any())                .Select(p => p.Key);        /// <summary>        /// Returns list of products types based upon the dissociation type        /// </summary>        /// <param name="dissociationType"></param>        /// <returns></returns>                                                                                                                                                                                                                     public static List<ProductType> GetRnaProductTypesFromDissociationType(this DissociationType dissociationType) =>            ProductsFromDissociationType[dissociationType];        /// <summary>        /// Mass to be added or subtracted        /// </summary>        private static readonly Dictionary<ProductType, ChemicalFormula> FragmentIonCaps =            new Dictionary<ProductType, ChemicalFormula>            {                { ProductType.a, ChemicalFormula.ParseFormula("H") },                { ProductType.aWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },                { ProductType.b, ChemicalFormula.ParseFormula("OH") },                { ProductType.bWaterLoss, ChemicalFormula.ParseFormula("H-1") },                { ProductType.c, ChemicalFormula.ParseFormula("O3H2P") },                { ProductType.cWaterLoss, ChemicalFormula.ParseFormula("O2P") },                { ProductType.d, ChemicalFormula.ParseFormula("O4H2P") },                { ProductType.dWaterLoss, ChemicalFormula.ParseFormula("O3P") },                { ProductType.w, ChemicalFormula.ParseFormula("H") },                { ProductType.wWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },                { ProductType.x, ChemicalFormula.ParseFormula("O-1H") },                { ProductType.xWaterLoss, ChemicalFormula.ParseFormula("O-2H-1") },                { ProductType.y, ChemicalFormula.ParseFormula("O-3P-1") },                { ProductType.yWaterLoss, ChemicalFormula.ParseFormula("O-4H-2P-1") },                { ProductType.z, ChemicalFormula.ParseFormula("O-4P-1") },                { ProductType.zWaterLoss, ChemicalFormula.ParseFormula("O-5H-2P-1") },                //fragment - Base chemical formula is the corresponding fragment chemical formula subtracing 1 H as H is lost when base is removed                { ProductType.aBaseLoss, ChemicalFormula.ParseFormula("H-2") }, // "H-1" -H                 { ProductType.bBaseLoss, ChemicalFormula.ParseFormula("O1H-2") }, //"OH1" -H                { ProductType.cBaseLoss, ChemicalFormula.ParseFormula("O3H-1P") }, //"O3P" -H                { ProductType.dBaseLoss, ChemicalFormula.ParseFormula("O4H-1P") }, //"O4H2P" -H                { ProductType.wBaseLoss, ChemicalFormula.ParseFormula("H-2") }, //"H"-H                { ProductType.xBaseLoss, ChemicalFormula.ParseFormula("O-1H-2") }, //"O-1H" -H                { ProductType.yBaseLoss, ChemicalFormula.ParseFormula("O-3H-2P-1") }, //"O-3P-1" -H                { ProductType.zBaseLoss, ChemicalFormula.ParseFormula("O-4H-3P-1") }, //"O-4H-1P-1" -1                { ProductType.M, new ChemicalFormula() }            };        /// <summary>        /// Returns mass shift by product type        /// </summary>        /// <param name="type"></param>        /// <returns></returns>        public static double GetRnaMassShiftFromProductType(this ProductType type) => FragmentIonCaps[type].MonoisotopicMass;        public static FragmentationTerminus GetRnaTerminusType(this ProductType fragmentType)        {            switch (fragmentType)            {                case ProductType.a:                case ProductType.aWaterLoss:                case ProductType.aBaseLoss:                case ProductType.b:                case ProductType.bWaterLoss:                case ProductType.bBaseLoss:                case ProductType.c:                case ProductType.cWaterLoss:                case ProductType.cBaseLoss:                case ProductType.d:                case ProductType.dWaterLoss:                case ProductType.dBaseLoss:                    return FragmentationTerminus.FivePrime;                case ProductType.w:                case ProductType.wWaterLoss:                case ProductType.wBaseLoss:                case ProductType.x:                case ProductType.xWaterLoss:                case ProductType.xBaseLoss:                case ProductType.y:                case ProductType.yWaterLoss:                case ProductType.yBaseLoss:                case ProductType.z:                case ProductType.zWaterLoss:                case ProductType.zBaseLoss:                    return FragmentationTerminus.ThreePrime;                case ProductType.M:                    return FragmentationTerminus.None;                case ProductType.aStar:                case ProductType.aDegree:                case ProductType.bAmmoniaLoss:                case ProductType.yAmmoniaLoss:                case ProductType.zPlusOne:                case ProductType.D:                case ProductType.Ycore:                case ProductType.Y:                default:                    throw new ArgumentOutOfRangeException(nameof(fragmentType), fragmentType, null);            }        }        /// <summary>        /// Product ion types by Fragmentation Terminus        /// </summary>        private static readonly Dictionary<FragmentationTerminus, List<ProductType>>            ProductIonTypesFromSpecifiedTerminus = new Dictionary<FragmentationTerminus, List<ProductType>>            {                {                    FragmentationTerminus.FivePrime, new List<ProductType>                    {                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,                     }                },                {                    FragmentationTerminus.ThreePrime, new List<ProductType>                    {                        ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,                    }                },                {                    FragmentationTerminus.Both, new List<ProductType>                    {                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,                         ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,                        ProductType.M                    }                }            };        public static List<ProductType> GetRnaTerminusSpecificProductTypes(            this FragmentationTerminus fragmentationTerminus)        {            return ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus];        }        /// <summary>        /// Returns all product ion types based upon specified terminus        /// </summary>        /// <param name="dissociationType"></param>        /// <param name="fragmentationTerminus"></param>        /// <returns></returns>        public static List<ProductType> GetRnaTerminusSpecificProductTypesFromDissociation(            this DissociationType dissociationType, FragmentationTerminus fragmentationTerminus)        {            var terminusSpecific = fragmentationTerminus.GetRnaTerminusSpecificProductTypes();            var dissociationSpecific = dissociationType.GetRnaProductTypesFromDissociationType();            return terminusSpecific.Intersect(dissociationSpecific).ToList();        }    }}
\ No newline at end of file
+﻿using Chemistry;
+using MassSpectrometry;
+
+namespace Omics.Fragmentation.Oligo
+{
+    /// <summary>
+    /// Methods dealing with specific product type for RNA molecules
+    /// </summary>
+    public static class DissociationTypeCollection
+    {
+        /// <summary>
+        /// Product Ion types by dissociation method
+        /// </summary>
+        /// <remarks>
+        /// HCD ions were taken from the following paper: https://www.nature.com/articles/s41598-023-36193-2
+        /// Ion types below here should be validated with experimental results.
+        /// Base and water losses occur very frequently and may also be present in these activation types.
+        /// CID, UVPD, and aEPD ions were taken from the following paper: https://pubs.acs.org/doi/10.1021/acs.analchem.3c05428?ref=PDF
+        /// NETD ions were taken from the following paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7161943/
+        /// lowCID ions were taken from this Thermo Poster: https://assets.thermofisher.com/TFS-Assets/CMD/Flyers/fl-489263-asms23-optimized-fragmentation-oligonucleotides-suppresses-undesired-fragmentation-fl489263-en.pdf
+        /// </remarks>
+        public static Dictionary<DissociationType, List<ProductType>> ProductsFromDissociationType =
+            new Dictionary<DissociationType, List<ProductType>>()
+            {
+                { DissociationType.Unknown, new List<ProductType>() },
+                { DissociationType.Custom, new List<ProductType>() },
+                {
+                    DissociationType.AnyActivationType, new List<ProductType>
+                    {
+                        ProductType.a, ProductType.aBaseLoss, ProductType.aWaterLoss,
+                        ProductType.b, ProductType.bBaseLoss, ProductType.bWaterLoss,
+                        ProductType.c, ProductType.cBaseLoss, ProductType.cWaterLoss,
+                        ProductType.d, ProductType.dBaseLoss, ProductType.dWaterLoss,
+                        ProductType.w, ProductType.wBaseLoss, ProductType.wWaterLoss,
+                        ProductType.x, ProductType.xBaseLoss, ProductType.xWaterLoss,
+                        ProductType.y, ProductType.yBaseLoss, ProductType.yWaterLoss,
+                        ProductType.z, ProductType.zBaseLoss, ProductType.zWaterLoss,
+                        ProductType.M
+                    }
+                },
+                {
+                    DissociationType.CID, new List<ProductType>
+                    {
+                        ProductType.a, ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,
+                        ProductType.y, ProductType.yWaterLoss, ProductType.M
+                    }
+                },
+                {
+                    DissociationType.HCD, new List<ProductType>
+                    {
+                        ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d,
+                        ProductType.dWaterLoss, ProductType.w, ProductType.x, ProductType.y, ProductType.z,
+                        ProductType.M
+                    }
+                },
+                {
+                    DissociationType.UVPD, new List<ProductType>
+                    {
+                        ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.M
+                    }
+                },
+                {
+                    DissociationType.aEPD, new List<ProductType>
+                    {
+                        ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.x, ProductType.z, ProductType.M
+                    }
+                },
+                {
+                    DissociationType.NETD, new List<ProductType>
+                    {
+                        ProductType.w, ProductType.d, ProductType.M
+                    }
+                },
+                {
+                    DissociationType.LowCID, new List<ProductType>()
+                    {
+                        ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,
+                        ProductType.y, ProductType.yWaterLoss, ProductType.M
+                    }
+                },
+                { DissociationType.IRMPD, new List<ProductType>() { } },
+                { DissociationType.ECD, new List<ProductType> { } },
+                { DissociationType.PQD, new List<ProductType> { } },
+                { DissociationType.ETD, new List<ProductType> { } },
+                { DissociationType.EThcD, new List<ProductType> { } },
+            };
+
+        /// <summary>
+        /// Returns all dissociation types with implemented product type collections
+        /// </summary>
+        public static IEnumerable<DissociationType> AllImplementedDissociationTypes =>
+            ProductsFromDissociationType.Where(p => p.Value.Any())
+                .Select(p => p.Key);
+
+        /// <summary>
+        /// Returns list of products types based upon the dissociation type
+        /// </summary>
+        /// <param name="dissociationType"></param>
+        /// <returns></returns>
+                                                                                                                                                                                                                     public static List<ProductType> GetRnaProductTypesFromDissociationType(this DissociationType dissociationType) =>
+            ProductsFromDissociationType[dissociationType];
+
+
+        /// <summary>
+        /// Mass to be added or subtracted
+        /// </summary>
+        private static readonly Dictionary<ProductType, ChemicalFormula> FragmentIonCaps =
+            new Dictionary<ProductType, ChemicalFormula>
+            {
+                { ProductType.a, ChemicalFormula.ParseFormula("H") },
+                { ProductType.aWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },
+                { ProductType.b, ChemicalFormula.ParseFormula("OH") },
+                { ProductType.bWaterLoss, ChemicalFormula.ParseFormula("H-1") },
+                { ProductType.c, ChemicalFormula.ParseFormula("O3H2P") },
+                { ProductType.cWaterLoss, ChemicalFormula.ParseFormula("O2P") },
+                { ProductType.d, ChemicalFormula.ParseFormula("O4H2P") },
+                { ProductType.dWaterLoss, ChemicalFormula.ParseFormula("O3P") },
+
+                { ProductType.w, ChemicalFormula.ParseFormula("H") },
+                { ProductType.wWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },
+                { ProductType.x, ChemicalFormula.ParseFormula("O-1H") },
+                { ProductType.xWaterLoss, ChemicalFormula.ParseFormula("O-2H-1") },
+                { ProductType.y, ChemicalFormula.ParseFormula("O-3P-1") },
+                { ProductType.yWaterLoss, ChemicalFormula.ParseFormula("O-4H-2P-1") },
+                { ProductType.z, ChemicalFormula.ParseFormula("O-4P-1") },
+                { ProductType.zWaterLoss, ChemicalFormula.ParseFormula("O-5H-2P-1") },
+                //fragment - Base chemical formula is the corresponding fragment chemical formula subtracing 1 H as H is lost when base is removed
+                { ProductType.aBaseLoss, ChemicalFormula.ParseFormula("H-2") }, // "H-1" -H 
+                { ProductType.bBaseLoss, ChemicalFormula.ParseFormula("O1H-2") }, //"OH1" -H
+                { ProductType.cBaseLoss, ChemicalFormula.ParseFormula("O3H-1P") }, //"O3P" -H
+                { ProductType.dBaseLoss, ChemicalFormula.ParseFormula("O4H-1P") }, //"O4H2P" -H
+
+                { ProductType.wBaseLoss, ChemicalFormula.ParseFormula("H-2") }, //"H"-H
+                { ProductType.xBaseLoss, ChemicalFormula.ParseFormula("O-1H-2") }, //"O-1H" -H
+                { ProductType.yBaseLoss, ChemicalFormula.ParseFormula("O-3H-2P-1") }, //"O-3P-1" -H
+                { ProductType.zBaseLoss, ChemicalFormula.ParseFormula("O-4H-3P-1") }, //"O-4H-1P-1" -1
+
+                { ProductType.M, new ChemicalFormula() }
+            };
+
+        /// <summary>
+        /// Returns mass shift by product type
+        /// </summary>
+        /// <param name="type"></param>
+        /// <returns></returns>
+        public static double GetRnaMassShiftFromProductType(this ProductType type) => FragmentIonCaps[type].MonoisotopicMass;
+
+        public static FragmentationTerminus GetRnaTerminusType(this ProductType fragmentType)
+        {
+            switch (fragmentType)
+            {
+                case ProductType.a:
+                case ProductType.aWaterLoss:
+                case ProductType.aBaseLoss:
+                case ProductType.b:
+                case ProductType.bWaterLoss:
+                case ProductType.bBaseLoss:
+                case ProductType.c:
+                case ProductType.cWaterLoss:
+                case ProductType.cBaseLoss:
+                case ProductType.d:
+                case ProductType.dWaterLoss:
+                case ProductType.dBaseLoss:
+                    return FragmentationTerminus.FivePrime;
+
+                case ProductType.w:
+                case ProductType.wWaterLoss:
+                case ProductType.wBaseLoss:
+                case ProductType.x:
+                case ProductType.xWaterLoss:
+                case ProductType.xBaseLoss:
+                case ProductType.y:
+                case ProductType.yWaterLoss:
+                case ProductType.yBaseLoss:
+                case ProductType.z:
+                case ProductType.zWaterLoss:
+                case ProductType.zBaseLoss:
+                    return FragmentationTerminus.ThreePrime;
+
+                case ProductType.M:
+                    return FragmentationTerminus.None;
+
+                case ProductType.aStar:
+                case ProductType.aDegree:
+                case ProductType.bAmmoniaLoss:
+                case ProductType.yAmmoniaLoss:
+                case ProductType.zPlusOne:
+                case ProductType.D:
+                case ProductType.Ycore:
+                case ProductType.Y:
+                default:
+                    throw new ArgumentOutOfRangeException(nameof(fragmentType), fragmentType, null);
+            }
+        }
+
+        /// <summary>
+        /// Product ion types by Fragmentation Terminus
+        /// </summary>
+        private static readonly Dictionary<FragmentationTerminus, List<ProductType>>
+            ProductIonTypesFromSpecifiedTerminus = new Dictionary<FragmentationTerminus, List<ProductType>>
+            {
+                {
+                    FragmentationTerminus.FivePrime, new List<ProductType>
+                    {
+                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,
+                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,
+                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,
+                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss, 
+                    }
+                },
+                {
+                    FragmentationTerminus.ThreePrime, new List<ProductType>
+                    {
+                        ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,
+                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,
+                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,
+                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,
+                    }
+                },
+                {
+                    FragmentationTerminus.Both, new List<ProductType>
+                    {
+
+                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,
+                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,
+                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,
+                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss, 
+                        ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,
+                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,
+                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,
+                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,
+                        ProductType.M
+                    }
+                }
+            };
+
+
+        public static List<ProductType> GetRnaTerminusSpecificProductTypes(
+            this FragmentationTerminus fragmentationTerminus)
+        {
+            return ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus];
+        }
+
+        /// <summary>
+        /// Returns all product ion types based upon specified terminus
+        /// </summary>
+        /// <param name="dissociationType"></param>
+        /// <param name="fragmentationTerminus"></param>
+        /// <returns></returns>
+        public static List<ProductType> GetRnaTerminusSpecificProductTypesFromDissociation(
+            this DissociationType dissociationType, FragmentationTerminus fragmentationTerminus)
+        {
+            var terminusSpecific = fragmentationTerminus.GetRnaTerminusSpecificProductTypes();
+            var dissociationSpecific = dissociationType.GetRnaProductTypesFromDissociationType();
+            return terminusSpecific.Intersect(dissociationSpecific).ToList();
+        }
+    }
+}
diff --git a/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs b/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
index 3a7382e7c..7f98597a4 100644
--- a/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
+++ b/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
@@ -32,7 +32,18 @@ public override string ToString()
             return BaseSequence;
         }
 
-        internal IEnumerable<OligoWithSetMods> GetModifiedOligos(IEnumerable<Modification> allKnownFixedMods,
+        /// <summary>
+        /// Generates a collection of oligos with set modifications based on the provided fixed and variable modifications,
+        /// digestion parameters, and the nucleic acid sequence.
+        /// </summary>
+        /// <param name="allKnownFixedMods">A collection of all known fixed modifications.</param>
+        /// <param name="digestionParams">Parameters for RNA digestion.</param>
+        /// <param name="variableModifications">A list of variable modifications to consider.</param>
+        /// <returns>An enumerable collection of oligos with set modifications.</returns>
+        /// <remarks>
+        /// Code heavily borrowed from ProteolyticPeptide.GetModifiedPeptides
+        /// </remarks>
+        internal IEnumerable<OligoWithSetMods> GenerateModifiedOligos(IEnumerable<Modification> allKnownFixedMods,
             RnaDigestionParams digestionParams, List<Modification> variableModifications)
         {
             int oligoLength = OneBasedEndResidue - OneBasedStartResidue + 1;
@@ -79,7 +90,7 @@ internal IEnumerable<OligoWithSetMods> GetModifiedOligos(IEnumerable<Modificatio
                 }
             }
 
-            // LOCALIZED MODS
+            // collect all localized modifications from the database. 
             foreach (var kvp in NucleicAcid.OneBasedPossibleLocalizedModifications)
             {
                 bool inBounds = kvp.Key >= OneBasedStartResidue && kvp.Key <= OneBasedEndResidue;
@@ -127,6 +138,7 @@ internal IEnumerable<OligoWithSetMods> GetModifiedOligos(IEnumerable<Modificatio
 
             int variable_modification_isoforms = 0;
 
+            // Add the mods to the oligo by return numerous OligoWithSetMods
             foreach (Dictionary<int, Modification> kvp in GetVariableModificationPatterns(twoBasedPossibleVariableAndLocalizeableModifications, maxModsForOligo, oligoLength))
             {
                 int numFixedMods = 0;
diff --git a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
index 966f97c50..f5e51c19c 100644
--- a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
+++ b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
@@ -9,6 +9,17 @@
 
 namespace Transcriptomics.Digestion
 {
+
+    /// <summary>
+    /// Represents an oligonucleotide with set modifications, providing properties and methods for
+    /// accessing and manipulating its chemical characteristics.
+    /// </summary>
+    /// <remarks>
+    /// The monoisotopic mass, most abundant mass, and chemical formula are calculated on the fly if the corresponding properties
+    /// (_monoisotopicMass, _thisChemicalFormula, _mostAbundantMonoisotopicMass) are null. This ensures that the most up-to-date values are
+    /// always available based on the current state of the oligonucleotide and its modifications. Therefor, it is important to set those
+    /// properties to null whenever a termini or modification is changed.
+    /// </remarks>
     public class OligoWithSetMods : NucleolyticOligo, IBioPolymerWithSetMods, INucleicAcid
     {
         public OligoWithSetMods(NucleicAcid nucleicAcid, RnaDigestionParams digestionParams, int oneBaseStartResidue,
@@ -83,13 +94,10 @@ public double MonoisotopicMass
         {
             get
             {
-                if (_monoisotopicMass is null)
-                {
-                    _monoisotopicMass = BaseSequence.Sum(nuc => Nucleotide.GetResidue(nuc).MonoisotopicMass) +
-                                        AllModsOneIsNterminus.Values.Sum(mod => mod.MonoisotopicMass.Value) +
-                                        FivePrimeTerminus.MonoisotopicMass +
-                                        ThreePrimeTerminus.MonoisotopicMass;
-                }
+                _monoisotopicMass ??= BaseSequence.Sum(nuc => Nucleotide.GetResidue(nuc).MonoisotopicMass) +
+                                      AllModsOneIsNterminus.Values.Sum(mod => mod.MonoisotopicMass!.Value) +
+                                      FivePrimeTerminus.MonoisotopicMass +
+                                      ThreePrimeTerminus.MonoisotopicMass;
                 return _monoisotopicMass.Value;
             }
         }
@@ -98,20 +106,19 @@ public ChemicalFormula ThisChemicalFormula
         {
             get
             {
-                if (_thisChemicalFormula is null)
+                if (_thisChemicalFormula is not null) return _thisChemicalFormula!;
+
+                var fullFormula = new RNA(BaseSequence, FivePrimeTerminus, ThreePrimeTerminus).GetChemicalFormula();
+                foreach (var mod in AllModsOneIsNterminus.Values)
                 {
-                    var fullFormula = new RNA(BaseSequence, FivePrimeTerminus, ThreePrimeTerminus).GetChemicalFormula();
-                    foreach (var mod in AllModsOneIsNterminus.Values)
+                    if (mod.ChemicalFormula is null)
                     {
-                        if (mod.ChemicalFormula is null)
-                        {
-                            fullFormula = null;
-                            break;
-                        }
-                        fullFormula.Add(mod.ChemicalFormula);
+                        fullFormula = null;
+                        break;
                     }
-                    _thisChemicalFormula = fullFormula;
+                    fullFormula.Add(mod.ChemicalFormula);
                 }
+                _thisChemicalFormula = fullFormula;
                 return _thisChemicalFormula!;
             }
         }
@@ -120,13 +127,12 @@ public double MostAbundantMonoisotopicMass
         {
             get
             {
-                if (_mostAbundantMonoisotopicMass is null)
-                {
-                    var distribution = IsotopicDistribution.GetDistribution(ThisChemicalFormula);
-                    double maxIntensity = distribution.Intensities.Max();
-                    _mostAbundantMonoisotopicMass = distribution.Masses[distribution.Intensities.IndexOf(maxIntensity)].RoundedDouble();
-                }
-                return _mostAbundantMonoisotopicMass.Value;
+                if (_mostAbundantMonoisotopicMass is not null) return _mostAbundantMonoisotopicMass.Value;
+
+                var distribution = IsotopicDistribution.GetDistribution(ThisChemicalFormula);
+                double maxIntensity = distribution.Intensities.Max();
+                _mostAbundantMonoisotopicMass = distribution.Masses[distribution.Intensities.IndexOf(maxIntensity)].RoundedDouble();
+                return _mostAbundantMonoisotopicMass!.Value;
             }
         }
 
diff --git a/mzLib/Transcriptomics/NucleicAcid.cs b/mzLib/Transcriptomics/NucleicAcid.cs
index 5a42b7d67..db6f18f43 100644
--- a/mzLib/Transcriptomics/NucleicAcid.cs
+++ b/mzLib/Transcriptomics/NucleicAcid.cs
@@ -45,10 +45,6 @@ public abstract class NucleicAcid : INucleicAcid, IBioPolymer, IEquatable<Nuclei
         /// <summary>
         /// For creating an RNA programatically
         /// </summary>
-        /// <param name="sequence"></param>
-        /// <param name="fivePrimeTerm"></param>
-        /// <param name="threePrimeTerm"></param>
-        /// <param name="oneBasedPossibleLocalizedModifications"></param>
         protected NucleicAcid(string sequence, IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null,
             IDictionary<int, List<Modification>>? oneBasedPossibleLocalizedModifications = null)
         {
@@ -59,23 +55,12 @@ protected NucleicAcid(string sequence, IHasChemicalFormula? fivePrimeTerm = null
             _oneBasedPossibleLocalizedModifications = oneBasedPossibleLocalizedModifications ?? new Dictionary<int, List<Modification>>();
             GeneNames = new List<Tuple<string, string>>();
 
-            ParseSequence(sequence);
+            ParseSequenceString(sequence);
         }
 
         /// <summary>
         /// For Reading in from rna database
         /// </summary>
-        /// <param name="sequence"></param>
-        /// <param name="name"></param>
-        /// <param name="identifier"></param>
-        /// <param name="organism"></param>
-        /// <param name="databaseFilePath"></param>
-        /// <param name="fivePrimeTerm"></param>
-        /// <param name="threePrimeTerm"></param>
-        /// <param name="oneBasedPossibleLocalizedModifications"></param>
-        /// <param name="isContaminant"></param>
-        /// <param name="isDecoy"></param>
-        /// <param name="additionalDatabaseFields"></param>
         protected NucleicAcid(string sequence, string name, string identifier, string organism, string databaseFilePath,
             IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null,
             IDictionary<int, List<Modification>>? oneBasedPossibleLocalizedModifications = null,
@@ -120,7 +105,6 @@ protected NucleicAcid(string sequence, string name, string identifier, string or
 
         #endregion
 
-
         #region Public Properties
 
         /// <summary>
@@ -216,7 +200,7 @@ public IEnumerable<IBioPolymerWithSetMods> Digest(IDigestionParams digestionPara
                          digestionParams.MaxMissedCleavages, digestionParams.MinLength, digestionParams.MaxLength))
             {
                 // add fixed and variable mods to base sequence digestion products
-                foreach (var modifiedOligo in unmodifiedOligo.GetModifiedOligos(allKnownFixedMods, digestionParams,
+                foreach (var modifiedOligo in unmodifiedOligo.GenerateModifiedOligos(allKnownFixedMods, digestionParams,
                              variableModifications))
                 {
                     yield return modifiedOligo;
@@ -273,10 +257,10 @@ public ChemicalFormula GetChemicalFormula()
 
         #region Private Methods
 
-        private bool ReplaceTerminus(ref IHasChemicalFormula terminus, IHasChemicalFormula value)
+        private void ReplaceTerminus(ref IHasChemicalFormula? terminus, IHasChemicalFormula? value)
         {
             if (Equals(value, terminus))
-                return false;
+                return;
 
             if (terminus != null)
                 MonoisotopicMass -= terminus.MonoisotopicMass;
@@ -285,24 +269,20 @@ private bool ReplaceTerminus(ref IHasChemicalFormula terminus, IHasChemicalFormu
 
             if (value != null)
                 MonoisotopicMass += value.MonoisotopicMass;
-
-            return true;
         }
 
         /// <summary>
-        /// Parses a string sequence of nucleic acids characters into a peptide object
+        /// Parses a string sequence of nucleic acid characters into an array of Nucleotide objects,
+        /// updates the sequence string, and calculates the monoisotopic mass.
         /// </summary>
-        /// <param name="sequence"></param>
-        /// <returns></returns>
-        private bool ParseSequence(string sequence)
+        /// <param name="sequence">The string sequence of nucleic acid characters to parse.</param>
+        private void ParseSequenceString(string sequence)
         {
             if (string.IsNullOrEmpty(sequence))
-                return false;
+                return;
 
             int index = 0;
-
             double monoMass = 0;
-            ChemicalFormula chemFormula = new();
 
             StringBuilder sb = null;
             sb = new StringBuilder(sequence.Length);
@@ -337,8 +317,6 @@ private bool ParseSequence(string sequence)
             _sequence = sb.ToString();
             MonoisotopicMass += monoMass;
             Array.Resize(ref _nucleicAcids, Length);
-
-            return true;
         }
 
         #endregion
@@ -349,7 +327,8 @@ public bool Equals(NucleicAcid? other)
         {
             if (ReferenceEquals(null, other)) return false;
             if (ReferenceEquals(this, other)) return true;
-            return _5PrimeTerminus.Equals(other._5PrimeTerminus)
+            return _sequence == other._sequence
+                   && _5PrimeTerminus.Equals(other._5PrimeTerminus)
                    && _3PrimeTerminus.Equals(other._3PrimeTerminus);
         }
 

From 94d8bfad98ac50447fe231dbdbe11b12ca8d44fe Mon Sep 17 00:00:00 2001
From: Nic Bollis <nbollis@wisc.edu>
Date: Tue, 24 Sep 2024 14:52:08 -0500
Subject: [PATCH 07/17] Add RNA support: loading, parsing, and decoy generation

Introduced support for handling RNA data within the UsefulProteomicsDatabases project. Key changes include:

- Added `Transcriptomics\TestData` folder to `Test.csproj`.
- Changed access modifiers in `ProteinDbLoader.cs` to internal.
- Added `using` directives for `Transcriptomics` in `ProteinXmlEntry.cs`.
- Introduced methods `ParseRnaEndElement` and `ParseRnaEntryEndElement` in `ProteinXmlEntry.cs`.
- Modified `ParseAnnotatedMods` to check for RNA modifications.
- Added project reference to `Transcriptomics.csproj` in `UsefulProteomicsDatabases.csproj`.
- Added `ClassExtensions.cs` with `CreateNew` method for nucleic acids.
- Added `RnaDbLoader.cs` for RNA database loading.
- Added `RnaDecoyGenerator.cs` for generating decoy RNA sequences.
---
 mzLib/Test/Test.csproj                        |   1 +
 mzLib/Transcriptomics/ClassExtensions.cs      |  81 ++++++
 .../ProteinDbLoader.cs                        |   6 +-
 .../ProteinXmlEntry.cs                        |  58 ++++-
 .../Transcriptomics/RnaDbLoader.cs            | 245 ++++++++++++++++++
 .../Transcriptomics/RnaDecoyGenerator.cs      |  88 +++++++
 .../UsefulProteomicsDatabases.csproj          |   1 +
 7 files changed, 475 insertions(+), 5 deletions(-)
 create mode 100644 mzLib/Transcriptomics/ClassExtensions.cs
 create mode 100644 mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs
 create mode 100644 mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDecoyGenerator.cs

diff --git a/mzLib/Test/Test.csproj b/mzLib/Test/Test.csproj
index b58d87522..a4cf4545d 100644
--- a/mzLib/Test/Test.csproj
+++ b/mzLib/Test/Test.csproj
@@ -513,6 +513,7 @@
 
   <ItemGroup>
     <Folder Include="TestData\" />
+    <Folder Include="Transcriptomics\TestData\" />
   </ItemGroup>
 
 </Project>
diff --git a/mzLib/Transcriptomics/ClassExtensions.cs b/mzLib/Transcriptomics/ClassExtensions.cs
new file mode 100644
index 000000000..a61c5c837
--- /dev/null
+++ b/mzLib/Transcriptomics/ClassExtensions.cs
@@ -0,0 +1,81 @@
+﻿using Omics.Modifications;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Transcriptomics.Digestion;
+
+namespace Transcriptomics
+{
+    public static class ClassExtensions
+    {
+        /// <summary>
+        /// Creates a new instance of a nucleic acid or oligo with set modifications, optionally updating its sequence, modifications, and decoy status.
+        /// </summary>
+        /// <typeparam name="T">The type of the nucleic acid, which must implement <see cref="INucleicAcid"/>.</typeparam>
+        /// <param name="target">The target nucleic acid or oligo with set modifications to base the new instance on.</param>
+        /// <param name="sequence">The new sequence string, if any. If null, the original sequence is used.</param>
+        /// <param name="modifications">A dictionary of modifications to apply, if any. If null, the original modifications are used.</param>
+        /// <param name="isDecoy">A flag indicating whether the sequence is a decoy, if any. If null, the original decoy status is used.</param>
+        /// <returns>A new instance of the specified nucleic acid type with the provided or existing properties.</returns>
+        /// <remarks>
+        /// This method facilitates the generation of new sequences for both nucleic acids and oligos with set modifications by allowing
+        /// optional updates to the sequence string, modifications, and decoy status. It ensures that the new instances are properly
+        /// initialized with the provided or existing properties, enabling further analysis of modified sequences and future generation of decoys on the fly.
+        /// </remarks>
+        public static T CreateNew<T>(this T target, string? sequence = null, IDictionary<int, List<Modification>>? modifications = null,
+        bool? isDecoy = null)
+            where T : INucleicAcid
+        {
+            // set new object parameters where not null
+            object? returnObj = null;
+            string newSequence = sequence ?? target.BaseSequence;
+            IDictionary<int, List<Modification>> newModifications = modifications ?? target.OneBasedPossibleLocalizedModifications;
+
+            switch (target)
+            {
+                case RNA rna:
+                {
+                    bool newIsDecoy = isDecoy ?? rna.IsDecoy;
+                    returnObj = new RNA(newSequence, rna.Name, rna.Accession, rna.Organism, rna.DatabaseFilePath,
+                        rna.FivePrimeTerminus, rna.ThreePrimeTerminus, newModifications, rna.IsContaminant, newIsDecoy, rna.AdditionalDatabaseFields);
+                    break;
+                }
+                case OligoWithSetMods oligo:
+                {
+                    var oldParent = oligo.Parent as RNA ?? throw new NullReferenceException();
+                    var newParent = new RNA(
+                        newSequence,
+                        oldParent.Name,
+                        oldParent.Accession,
+                        oldParent.Organism,
+                        oldParent.DatabaseFilePath,
+                        oldParent.FivePrimeTerminus,
+                        oldParent.ThreePrimeTerminus,
+                        newModifications,
+                        oldParent.IsContaminant,
+                        oldParent.IsDecoy,
+                        oldParent.AdditionalDatabaseFields);
+
+                    returnObj = new OligoWithSetMods(
+                        newParent,
+                        (oligo.DigestionParams as RnaDigestionParams)!,
+                        oligo.OneBasedStartResidue,
+                        oligo.OneBasedEndResidue,
+                        oligo.MissedCleavages,
+                        oligo.CleavageSpecificityForFdrCategory,
+                        newModifications.ToDictionary(p => p.Key, p => p.Value.First()),
+                        oligo.NumFixedMods,
+                        oligo.FivePrimeTerminus,
+                        oligo.ThreePrimeTerminus);
+                    break;
+                }
+                default:
+                    throw new ArgumentException("INucleicAcid type not yet implemented");
+            }
+
+            return (T)returnObj ?? throw new NullReferenceException("Error creating new INucleicAcid");
+        }
+    }
+}
diff --git a/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs b/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs
index 8544c2233..b5a680a5e 100644
--- a/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs
+++ b/mzLib/UsefulProteomicsDatabases/ProteinDbLoader.cs
@@ -402,7 +402,7 @@ public static IEnumerable<Protein> MergeProteins(IEnumerable<Protein> mergeThese
             }
         }
 
-        private static string ApplyRegex(FastaHeaderFieldRegex regex, string line)
+        internal static string ApplyRegex(FastaHeaderFieldRegex regex, string line)
         {
             string result = null;
             if (regex != null)
@@ -416,7 +416,7 @@ private static string ApplyRegex(FastaHeaderFieldRegex regex, string line)
             return result;
         }
 
-        private static Dictionary<string, IList<Modification>> GetModificationDict(IEnumerable<Modification> mods)
+        internal static Dictionary<string, IList<Modification>> GetModificationDict(IEnumerable<Modification> mods)
         {
             var mod_dict = new Dictionary<string, IList<Modification>>();
 
@@ -436,7 +436,7 @@ private static Dictionary<string, IList<Modification>> GetModificationDict(IEnum
             return mod_dict;
         }
 
-        private static Dictionary<string, Modification> GetModificationDictWithMotifs(IEnumerable<Modification> mods)
+        internal static Dictionary<string, Modification> GetModificationDictWithMotifs(IEnumerable<Modification> mods)
         {
             var mod_dict = new Dictionary<string, Modification>();
 
diff --git a/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs b/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
index a93c896e7..698a1c51c 100644
--- a/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
+++ b/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
@@ -5,6 +5,8 @@
 using System.Text.RegularExpressions;
 using System.Xml;
 using Omics.Modifications;
+using Transcriptomics;
+using UsefulProteomicsDatabases.Transcriptomics;
 
 namespace UsefulProteomicsDatabases
 {
@@ -182,6 +184,38 @@ public Protein ParseEndElement(XmlReader xml, IEnumerable<string> modTypesToExcl
             return protein;
         }
 
+        internal RNA ParseRnaEndElement(XmlReader xml, IEnumerable<string> modTypesToExclude,
+            Dictionary<string, Modification> unknownModifications,
+            bool isContaminant, string rnaDbLocation)
+        {
+            RNA result = null;
+            if (xml.Name == "feature")
+            {
+                ParseFeatureEndElement(xml, modTypesToExclude, unknownModifications);
+            }
+            if (xml.Name == "subfeature")
+            {
+                ParseSubFeatureEndElement(xml, modTypesToExclude, unknownModifications);
+            }
+            else if (xml.Name == "dbReference")
+            {
+                ParseDatabaseReferenceEndElement(xml);
+            }
+            else if (xml.Name == "gene")
+            {
+                ReadingGene = false;
+            }
+            else if (xml.Name == "organism")
+            {
+                ReadingOrganism = false;
+            }
+            else if (xml.Name == "entry")
+            {
+                result = ParseRnaEntryEndElement(xml, isContaminant, rnaDbLocation, modTypesToExclude, unknownModifications);
+            }
+            return result;
+        }
+
         /// <summary>
         /// Finish parsing an entry
         /// </summary>
@@ -202,6 +236,24 @@ public Protein ParseEntryEndElement(XmlReader xml, bool isContaminant, string pr
             return result;
         }
 
+        internal RNA ParseRnaEntryEndElement(XmlReader xml, bool isContaminant, string rnaDbLocation,
+            IEnumerable<string> modTypesToExclude, Dictionary<string, Modification> unknownModifications)
+        {
+            RNA result = null;
+            if (Accession != null && Sequence != null)
+            {
+                // sanitize the sequence to replace unexpected characters with X (unknown amino acid)
+                // sometimes strange characters get added by RNA sequencing software, etc.
+                Sequence = ProteinDbLoader.SanitizeAminoAcidSequence(Sequence, 'X');
+
+                ParseAnnotatedMods(OneBasedModifications, modTypesToExclude, unknownModifications, AnnotatedMods);
+                result = new RNA(Sequence, Name, Accession, Organism, rnaDbLocation, null,
+                    null, OneBasedModifications, isContaminant, false, null);
+            }
+            Clear();
+            return result;
+        }
+
         /// <summary>
         /// Finish parsing a subfeature element
         /// </summary>
@@ -304,7 +356,8 @@ private static void ParseAnnotatedMods(Dictionary<int, List<Modification>> desti
                 string annotatedId = annotatedMod.Item2;
                 int annotatedModLocation = annotatedMod.Item1;
 
-                if (ProteinDbLoader.IdWithMotifToMod.TryGetValue(annotatedId, out Modification foundMod))
+                if (ProteinDbLoader.IdWithMotifToMod.TryGetValue(annotatedId, out Modification foundMod)
+                    || RnaDbLoader.IdWithMotifToMod.TryGetValue(annotatedId, out foundMod))
                 {
                     // if the list of known mods contains this IdWithMotif
                     if (!modTypesToExclude.Contains(foundMod.ModificationType))
@@ -322,7 +375,8 @@ private static void ParseAnnotatedMods(Dictionary<int, List<Modification>> desti
                 }
 
                 // no known mod - try looking it up in the dictionary of mods without motif appended
-                else if (ProteinDbLoader.IdToPossibleMods.TryGetValue(annotatedId, out IList<Modification> mods))
+                else if (ProteinDbLoader.IdToPossibleMods.TryGetValue(annotatedId, out IList<Modification> mods)
+                         || RnaDbLoader.IdToPossibleMods.TryGetValue(annotatedId, out mods))
                 {
                     foreach (Modification mod in mods)
                     {
diff --git a/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs b/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs
new file mode 100644
index 000000000..1828f7b4c
--- /dev/null
+++ b/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs
@@ -0,0 +1,245 @@
+﻿using Omics.Modifications;
+using System;
+using System.Collections.Generic;
+using System.IO.Compression;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks;
+using System.Xml;
+using Chemistry;
+using Transcriptomics;
+
+namespace UsefulProteomicsDatabases.Transcriptomics
+{
+    public enum RnaFastaHeaderType
+    {
+        Modomics,
+        Unknown,
+    }
+
+    public static class RnaDbLoader
+    {
+
+        public static readonly Dictionary<string, FastaHeaderFieldRegex> ModomicsFieldRegexes =
+            new Dictionary<string, FastaHeaderFieldRegex>()
+            {
+                { "Id", new FastaHeaderFieldRegex("Id", @"id:(?<id>.+?)\|", 0, 1) },
+                { "Name", new FastaHeaderFieldRegex("Name", @"Name:(?<Name>.+?)\|", 0, 1) },
+                { "SOterm", new FastaHeaderFieldRegex("SOterm", @"SOterm:(?<SOterm>.+?)\|", 0, 1) },
+                { "Type", new FastaHeaderFieldRegex("Type", @"Type:(?<Type>.+?)\|", 0, 1) },
+                { "Subtype", new FastaHeaderFieldRegex("Subtype", @"Subtype:(?<Subtype>.+?)\|", 0, 1) },
+                { "Feature", new FastaHeaderFieldRegex("Feature", @"Feature:(?<Feature>.+?)\|", 0, 1) },
+                { "Organism", new FastaHeaderFieldRegex("Organism", @"Species:(?<Species>.+?)$", 0, 1) },
+                { "Cellular Localization", new FastaHeaderFieldRegex("CellularLocalization", @"Cellular_Localization:(?<Cellular_Localization>.+?)\|", 0, 1) },
+            };
+
+
+        public static List<RNA> LoadRnaFasta(string rnaDbLocation, bool generateTargets, DecoyType decoyType,
+            bool isContaminant, out List<string> errors, IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null)
+        {
+            RnaFastaHeaderType? headerType = null;
+            Regex substituteWhitespace = new Regex(@"\s+");
+            errors = new List<string>();
+            List<RNA> targets = new List<RNA>();
+            string identifierHeader = null;
+
+            string name = null;
+            string organism = null;
+            string identifier = null;
+
+            string newDbLocation = rnaDbLocation;
+
+            //we had trouble decompressing and streaming on the fly so we decompress completely first, then stream the file, then delete the decompressed file
+            if (rnaDbLocation.EndsWith(".gz"))
+            {
+                newDbLocation = Path.Combine(Path.GetDirectoryName(rnaDbLocation), "temp.fasta");
+                using var stream = new FileStream(rnaDbLocation, FileMode.Open, FileAccess.Read, FileShare.Read);
+                using FileStream outputFileStream = File.Create(newDbLocation);
+                using var decompressor = new GZipStream(stream, CompressionMode.Decompress);
+                decompressor.CopyTo(outputFileStream);
+            }
+
+            using (var fastaFileStream = new FileStream(newDbLocation, FileMode.Open, FileAccess.Read, FileShare.Read))
+            {
+                StringBuilder sb = null;
+                StreamReader fasta = new StreamReader(fastaFileStream);
+                Dictionary<string, string> regexResults = new();
+                Dictionary<string, FastaHeaderFieldRegex> regexes = null;
+
+                while (true)
+                {
+                    string line = "";
+                    line = fasta.ReadLine();
+                    if (line == null) { break; }
+
+                    if (line.StartsWith(">"))
+                    {
+                        if (headerType is null)
+                        {
+                            headerType = DetectFastaHeaderType(line);
+
+                            switch (headerType)
+                            {
+                                case RnaFastaHeaderType.Modomics:
+                                    regexes = ModomicsFieldRegexes;
+                                    identifierHeader = "SOterm";
+                                    break;
+
+                                case RnaFastaHeaderType.Unknown:
+                                case null:
+                                default:
+                                    throw new MzLibUtil.MzLibException("Unknown fasta header format: " + line);
+                            }
+                        }
+
+
+                        regexResults = ParseRegexFields(line, regexes);
+                        name = regexResults["Name"];
+                        regexResults.Remove("Name");
+                        organism = regexResults["Organism"];
+                        regexResults.Remove("Organism");
+                        identifier = regexResults[identifierHeader];
+                        regexResults.Remove(identifierHeader);
+
+                        sb = new StringBuilder();
+                    }
+                    else if (sb is not null)
+                    {
+                        sb.Append(line.Trim());
+                    }
+
+                    if ((fasta.Peek() == '>' || fasta.Peek() == -1) /*&& accession != null*/ && sb != null)
+                    {
+                        string sequence = substituteWhitespace.Replace(sb.ToString(), "");
+                        Dictionary<string, string> additonalDatabaseFields =
+                            regexResults.ToDictionary(x => x.Key, x => x.Value);
+
+                        // Do we need to sanitize the sequence? 
+
+                        RNA rna = new RNA(sequence, name, identifier, organism, rnaDbLocation,
+                            fivePrimeTerm, threePrimeTerm, null,
+                            isContaminant, false, additonalDatabaseFields);
+                        if (rna.Length == 0)
+                            errors.Add("Line" + line + ", Rna length of 0: " + rna.Name + "was skipped from database: " + rnaDbLocation);
+                        else
+                            targets.Add(rna);
+
+                        name = null;
+                        organism = null;
+                        identifier = null;
+                        regexResults.Clear();
+                    }
+
+                    // no input left
+                    if (fasta.Peek() == -1)
+                    {
+                        break;
+                    }
+                }
+            }
+
+            if (newDbLocation != rnaDbLocation)
+                File.Delete(newDbLocation);
+
+            if (!targets.Any())
+                errors.Add("No targets were loaded from database: " + rnaDbLocation);
+
+            List<RNA> decoys = RnaDecoyGenerator.GenerateDecoys(targets, decoyType);
+            return generateTargets ? targets.Concat(decoys).ToList() : decoys;
+        }
+
+        private static RnaFastaHeaderType DetectFastaHeaderType(string line)
+        {
+            if (!line.StartsWith(">"))
+                return RnaFastaHeaderType.Unknown;
+            
+
+            return RnaFastaHeaderType.Modomics;
+        }
+
+        private static Dictionary<string, string> ParseRegexFields(string line,
+            Dictionary<string, FastaHeaderFieldRegex> regexes)
+        {
+            Dictionary<string, string> fields = new Dictionary<string, string>();
+
+            foreach (var regex in regexes)
+            {
+                string match = ProteinDbLoader.ApplyRegex(regex.Value, line);
+                fields.Add(regex.Key, match);
+            }
+
+            return fields;
+        }
+
+
+        public static Dictionary<string, IList<Modification>> IdToPossibleMods = new Dictionary<string, IList<Modification>>();
+        public static Dictionary<string, Modification> IdWithMotifToMod = new Dictionary<string, Modification>();
+
+        public static List<RNA> LoadRnaXML(string rnaDbLocation, bool generateTargets, DecoyType decoyType,
+            bool isContaminant, IEnumerable<Modification> allKnownModifications,
+            IEnumerable<string> modTypesToExclude, out Dictionary<string, Modification> unknownModifications,
+            int maxThreads = 1, IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null)
+        {
+            var prespecified = ProteinDbLoader.GetPtmListFromProteinXml(rnaDbLocation);
+            allKnownModifications = allKnownModifications ?? new List<Modification>();
+            modTypesToExclude = modTypesToExclude ?? new List<string>();
+
+            if (prespecified.Count > 0 || allKnownModifications.Count() > 0)
+            {
+                //modsDictionary = GetModificationDict(new HashSet<Modification>(prespecified.Concat(allKnownModifications)));
+                IdToPossibleMods = ProteinDbLoader.GetModificationDict(new HashSet<Modification>(prespecified.Concat(allKnownModifications)));
+                IdWithMotifToMod = ProteinDbLoader.GetModificationDictWithMotifs(new HashSet<Modification>(prespecified.Concat(allKnownModifications)));
+            }
+            List<RNA> targets = new List<RNA>();
+            unknownModifications = new Dictionary<string, Modification>();
+
+            string newProteinDbLocation = rnaDbLocation;
+
+            //we had trouble decompressing and streaming on the fly so we decompress completely first, then stream the file, then delete the decompressed file
+            if (rnaDbLocation.EndsWith(".gz"))
+            {
+                newProteinDbLocation = Path.Combine(Path.GetDirectoryName(rnaDbLocation), "temp.xml");
+                using var stream = new FileStream(rnaDbLocation, FileMode.Open, FileAccess.Read, FileShare.Read);
+                using FileStream outputFileStream = File.Create(newProteinDbLocation);
+                using var decompressor = new GZipStream(stream, CompressionMode.Decompress);
+                decompressor.CopyTo(outputFileStream);
+            }
+
+            using (var uniprotXmlFileStream = new FileStream(newProteinDbLocation, FileMode.Open, FileAccess.Read, FileShare.Read))
+            {
+                Regex substituteWhitespace = new Regex(@"\s+");
+
+                ProteinXmlEntry block = new ProteinXmlEntry();
+
+                using (XmlReader xml = XmlReader.Create(uniprotXmlFileStream))
+                {
+                    while (xml.Read())
+                    {
+                        if (xml.NodeType == XmlNodeType.Element)
+                        {
+                            block.ParseElement(xml.Name, xml);
+                        }
+                        if (xml.NodeType == XmlNodeType.EndElement || xml.IsEmptyElement)
+                        {
+                            RNA newProtein = block.ParseRnaEndElement(xml, modTypesToExclude, unknownModifications, isContaminant, rnaDbLocation);
+                            if (newProtein != null)
+                            {
+                                targets.Add(newProtein);
+                            }
+                        }
+                    }
+                }
+            }
+            if (newProteinDbLocation != rnaDbLocation)
+            {
+                File.Delete(newProteinDbLocation);
+            }
+
+            List<RNA> decoys = RnaDecoyGenerator.GenerateDecoys(targets, decoyType, maxThreads);
+            IEnumerable<RNA> proteinsToExpand = generateTargets ? targets.Concat(decoys) : decoys;
+            return proteinsToExpand.ToList();
+        }
+    }
+}
diff --git a/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDecoyGenerator.cs b/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDecoyGenerator.cs
new file mode 100644
index 000000000..6bd25e31f
--- /dev/null
+++ b/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDecoyGenerator.cs
@@ -0,0 +1,88 @@
+﻿using Proteomics;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using MassSpectrometry;
+using Omics.Modifications;
+using Transcriptomics;
+
+namespace UsefulProteomicsDatabases.Transcriptomics
+{
+    /// <summary>
+    /// Provides methods for generating decoy nucleic acids from any implementor of <see cref="INucleicAcid"/>.
+    /// </summary>
+    /// <remarks>
+    /// This class supports various types of decoy generation, including reversing, sliding, and shuffling sequences.
+    /// It allows for the creation of decoy sequences while preserving certain characteristics such as modification sites and termini.
+    /// The <c>GenerateDecoys</c> method serves as the main entry point, delegating to specific decoy generation methods based on the specified <see cref="DecoyType"/>.
+    /// TODO: Implement Shuffle and Slide Decoys
+    /// TODO: Consider passing digestion motif as optional parameter to leave digestion sites intact. Currently leaving the 3' intact as it is the predominant cleavage motif.
+    /// </remarks>
+    public static class RnaDecoyGenerator
+    {
+        public static List<T> GenerateDecoys<T>(List<T> nucleicAcids, DecoyType decoyType, int maxThreads = -1) where T : INucleicAcid
+        {
+            switch (decoyType)
+            {
+                case DecoyType.None:
+                    return new List<T>();
+                case DecoyType.Reverse:
+                    return GenerateReverseDecoys(nucleicAcids, maxThreads);
+                case DecoyType.Slide:
+                    return GenerateSlidedDecoys(nucleicAcids, maxThreads);
+                case DecoyType.Shuffle:
+                    return GenerateShuffledDeocys(nucleicAcids, maxThreads);
+                case DecoyType.Random:
+                default:
+                    throw new ArgumentOutOfRangeException(nameof(decoyType), decoyType, null);
+            }
+        }
+
+        /// <summary>
+        /// Generated decoys in which the sequence is reversed,
+        /// leaving modification on their nucleic acid of origin,
+        /// and 3' termini intact as it is the most likely cleavage site. 
+        /// </summary>
+        /// <param name="nucleicAcids"></param>
+        /// <param name="maxThreads"></param>
+        /// <returns></returns>
+        private static List<T> GenerateReverseDecoys<T>(List<T> nucleicAcids, int maxThreads = -1) where T : INucleicAcid
+        {
+            List<T> decoyNucleicAcids = new List<T>();
+            Parallel.ForEach(nucleicAcids, new ParallelOptions() { MaxDegreeOfParallelism = maxThreads }, nucleicAcid =>
+            {
+                // reverse sequence
+                var reverseSequence =
+                    new string(nucleicAcid.BaseSequence[..^1].Reverse().Append(nucleicAcid.BaseSequence.Last()).ToArray());
+
+                // reverse modifications
+                var reverseModifications = new Dictionary<int, List<Modification>>();
+                foreach (var kvp in nucleicAcid.OneBasedPossibleLocalizedModifications)
+                {
+                    var reverseKey = kvp.Key == reverseSequence.Length ? kvp.Key : reverseSequence.Length - kvp.Key;
+                    reverseModifications.Add(reverseKey, kvp.Value);
+                }
+
+                T newNucleicAcid = nucleicAcid.CreateNew(reverseSequence, reverseModifications, true);
+                lock (decoyNucleicAcids)
+                {
+                    decoyNucleicAcids.Add(newNucleicAcid);
+                }
+            });
+            return decoyNucleicAcids;
+        }
+
+        private static List<T> GenerateSlidedDecoys<T>(List<T> nucleicAcids, int maxThreads = -1) where T : INucleicAcid
+        {
+            throw new NotImplementedException();
+        }
+
+        private static List<T> GenerateShuffledDeocys<T>(List<T> nucleicAcids, int maxThreads = -1) where T : INucleicAcid
+        {
+            throw new NotImplementedException();
+        }
+
+    }
+}
diff --git a/mzLib/UsefulProteomicsDatabases/UsefulProteomicsDatabases.csproj b/mzLib/UsefulProteomicsDatabases/UsefulProteomicsDatabases.csproj
index d4e73fa42..f057fc396 100644
--- a/mzLib/UsefulProteomicsDatabases/UsefulProteomicsDatabases.csproj
+++ b/mzLib/UsefulProteomicsDatabases/UsefulProteomicsDatabases.csproj
@@ -19,6 +19,7 @@
     <ProjectReference Include="..\Chemistry\Chemistry.csproj" />
     <ProjectReference Include="..\MzLibUtil\MzLibUtil.csproj" />
     <ProjectReference Include="..\Proteomics\Proteomics.csproj" />
+    <ProjectReference Include="..\Transcriptomics\Transcriptomics.csproj" />
   </ItemGroup>
 
   <ItemGroup>

From c32ec7bb64f4e3e56da539a9c92fda61ab25aab5 Mon Sep 17 00:00:00 2001
From: Nic Bollis <nbollis@wisc.edu>
Date: Tue, 24 Sep 2024 15:34:04 -0500
Subject: [PATCH 08/17] Add new properties and caching to oligo digestion

Updated `using` directives in `TestDigestion.cs` and `OligoWithSetMods.cs` to include necessary namespaces. Added assertions in `TestDigestion.cs` for `SequenceWithChemicalFormulas` and `FullSequenceWithMassShift`. Changed `namespace` in `OligoWithSetMods.cs` to `Transcriptomics.Digestion`. Implemented and cached `SequenceWithChemicalFormulas` property in `OligoWithSetMods.cs`.
---
 mzLib/Test/Transcriptomics/TestDigestion.cs   |  6 +++
 .../Digestion/OligoWithSetMods.cs             | 38 ++++++++++++++++++-
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/mzLib/Test/Transcriptomics/TestDigestion.cs b/mzLib/Test/Transcriptomics/TestDigestion.cs
index d210b6a2e..0a9c526eb 100644
--- a/mzLib/Test/Transcriptomics/TestDigestion.cs
+++ b/mzLib/Test/Transcriptomics/TestDigestion.cs
@@ -6,6 +6,7 @@
 using Chemistry;
 using MassSpectrometry;
 using NUnit.Framework;
+using Omics;
 using Omics.Digestion;
 using Omics.Fragmentation;
 using Omics.Modifications;
@@ -368,7 +369,12 @@ public static void TestTermini_ThreePrimeCyclicPhosphate()
                 .Select(p => (OligoWithSetMods)p).ToList();
             Assert.That(digestionProducts.Count, Is.EqualTo(2));
             Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG"));
+            Assert.That(digestionProducts[0].SequenceWithChemicalFormulas, Is.EqualTo("UAGUCGUUGAUAG"));
+            Assert.That(digestionProducts[0].FullSequenceWithMassShift(), Is.EqualTo("UAGUCGUUGAUAG"));
+            
             Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("UAGUCGUUGAUAG[Digestion Termini:Cyclic Phosphate on X]"));
+            Assert.That(digestionProducts[1].SequenceWithChemicalFormulas, Is.EqualTo("UAGUCGUUGAUAG[H-2O-1]"));
+            Assert.That(digestionProducts[1].FullSequenceWithMassShift(), Is.EqualTo("UAGUCGUUGAUAG[-18.010565]"));
 
             // top-down digestion, 3' oligo terminal modification
             variableMods = new List<Modification> { oligoCyclicPhosphate };
diff --git a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
index f5e51c19c..2ef0ae3f2 100644
--- a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
+++ b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
@@ -6,10 +6,10 @@
 using Omics;
 using Easy.Common.Extensions;
 using Omics.Fragmentation.Oligo;
+using System.Text;
 
 namespace Transcriptomics.Digestion
 {
-
     /// <summary>
     /// Represents an oligonucleotide with set modifications, providing properties and methods for
     /// accessing and manipulating its chemical characteristics.
@@ -63,6 +63,7 @@ public OligoWithSetMods(string sequence, Dictionary<string, Modification> allKno
         private ChemicalFormula? _thisChemicalFormula;
         private double? _mostAbundantMonoisotopicMass;
         private IDictionary<int, List<Modification>>? _oneBasedPossibleLocalizedModifications;
+        private string? _sequenceWithChemicalFormula;
 
         public string FullSequence { get; private set; }
         public IDigestionParams DigestionParams => _digestionParams;
@@ -136,7 +137,40 @@ public double MostAbundantMonoisotopicMass
             }
         }
 
-        public string SequenceWithChemicalFormulas => throw new NotImplementedException();
+        public string SequenceWithChemicalFormulas
+        {
+            get
+            {
+                if (_sequenceWithChemicalFormula is not null) return _sequenceWithChemicalFormula;
+
+                var subsequence = new StringBuilder();
+                // variable modification on peptide N-terminus
+                if (AllModsOneIsNterminus.TryGetValue(1, out Modification? pepNTermVariableMod))
+                {
+                    if (pepNTermVariableMod is { } mod)
+                        subsequence.Append('[' + mod.ChemicalFormula.Formula + ']');
+                }
+
+                for (int r = 0; r < Length; r++)
+                {
+                    subsequence.Append(this[r]);
+                    // variable modification on this residue
+                    if (!AllModsOneIsNterminus.TryGetValue(r + 2, out Modification? residueVariableMod)) continue;
+                    if (residueVariableMod is { } mod)
+                        subsequence.Append('[' + mod.ChemicalFormula.Formula + ']');
+                }
+
+                // variable modification on peptide C-terminus
+                if (AllModsOneIsNterminus.TryGetValue(Length + 2, out Modification? pepCTermVariableMod))
+                {
+                    if (pepCTermVariableMod is { } mod)
+                        subsequence.Append('[' + mod.ChemicalFormula.Formula + ']');
+                }
+
+                _sequenceWithChemicalFormula = subsequence.ToString();
+                return _sequenceWithChemicalFormula;
+            }
+        }
 
         public Dictionary<int, Modification> AllModsOneIsNterminus => _allModsOneIsNterminus;
 

From c29b02380ce0113808679e2d826ca8ecae2dc265 Mon Sep 17 00:00:00 2001
From: Nic Bollis <nbollis@wisc.edu>
Date: Tue, 24 Sep 2024 16:44:23 -0500
Subject: [PATCH 09/17] Add RNA sequence and database handling and related test
 cases

- Added new files `ModomicsUnmodifiedTrimmed.fasta` and `ModomicsUnmodifiedTrimmed.fasta.gz` to `Test.csproj` with `CopyToOutputDirectory` set to `PreserveNewest`.
- Removed the `Transcriptomics\TestData` folder from `Test.csproj`.
- Introduced `Transcribe` method in `ClassExtensions.cs` for DNA to RNA transcription.
- Added summary comment to `NucleolyticOligo` class in `NucleolyticOligo.cs`.
- Added `ApplyRegex` method in `FastaHeaderFieldRegex.cs`.
- Introduced `ProteinDbWriter` class in `ProteinDbWriter.cs` for writing protein and nucleic acid databases.
- Modified `GetModsForThisProtein` to `GetModsForThisBioPolymer` in `ProteinDbWriter.cs`.
- Added `RnaDbLoader` class in `RnaDbLoader.cs` for RNA FASTA header detection and sequence loading.
- Updated user dictionary in `mzLib.sln.DotSettings` with new terms.
- Added test cases in `TestDbLoader.cs` for RNA database loading and header detection.
- Introduced `TestDecoyGeneration` class in `TestDecoyGenerator.cs` for RNA decoy generation tests.
- Added RNA sequence file `ModomicsUnmodifiedTrimmed.fasta` and its compressed version.
---
 mzLib/Test/Test.csproj                        |   7 +-
 .../TestData/ModomicsUnmodifiedTrimmed.fasta  |  10 +
 .../ModomicsUnmodifiedTrimmed.fasta.gz        | Bin 0 -> 369 bytes
 mzLib/Test/Transcriptomics/TestDbLoader.cs    | 149 ++++++++++
 .../Transcriptomics/TestDecoyGenerator.cs     | 227 ++++++++++++++
 mzLib/Transcriptomics/ClassExtensions.cs      |  40 +++
 .../Digestion/NucleolyticOligo.cs             |   3 +
 .../FastaHeaderFieldRegex.cs                  |  12 +
 .../ProteinDbWriter.cs                        | 280 +++++++++++++++++-
 .../Transcriptomics/RnaDbLoader.cs            |  48 ++-
 mzLib/mzLib.sln.DotSettings                   |   2 +
 11 files changed, 757 insertions(+), 21 deletions(-)
 create mode 100644 mzLib/Test/Transcriptomics/TestData/ModomicsUnmodifiedTrimmed.fasta
 create mode 100644 mzLib/Test/Transcriptomics/TestData/ModomicsUnmodifiedTrimmed.fasta.gz
 create mode 100644 mzLib/Test/Transcriptomics/TestDbLoader.cs
 create mode 100644 mzLib/Test/Transcriptomics/TestDecoyGenerator.cs

diff --git a/mzLib/Test/Test.csproj b/mzLib/Test/Test.csproj
index a4cf4545d..76ab4ac9b 100644
--- a/mzLib/Test/Test.csproj
+++ b/mzLib/Test/Test.csproj
@@ -494,6 +494,12 @@
     </None>
     <None Update="FileReadingTests\SearchResults\VariantCrossTest.psmtsv">
       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
+    <None Update="Transcriptomics\TestData\ModomicsUnmodifiedTrimmed.fasta">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="Transcriptomics\TestData\ModomicsUnmodifiedTrimmed.fasta.gz">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
 	<None Update="DataFiles\centroid_1x_MS1_4x_autoMS2.d\**">
 		  <CopyToOutputDirectory>Always</CopyToOutputDirectory>
@@ -513,7 +519,6 @@
 
   <ItemGroup>
     <Folder Include="TestData\" />
-    <Folder Include="Transcriptomics\TestData\" />
   </ItemGroup>
 
 </Project>
diff --git a/mzLib/Test/Transcriptomics/TestData/ModomicsUnmodifiedTrimmed.fasta b/mzLib/Test/Transcriptomics/TestData/ModomicsUnmodifiedTrimmed.fasta
new file mode 100644
index 000000000..18802a82a
--- /dev/null
+++ b/mzLib/Test/Transcriptomics/TestData/ModomicsUnmodifiedTrimmed.fasta
@@ -0,0 +1,10 @@
+>id:1|Name:tdbR00000010|SOterm:SO:0000254|Type:tRNA|Subtype:Ala|Feature:VGC|Cellular_Localization:prokaryotic cytosol|Species:Escherichia coli
+GGGGCUAUAGCUCAGCUGGGAGAGCGCCUGCUUUGCACGCAGGAGGUCUGCGGUUCGAUCCCGCAUAGCUCCACCA
+>id:2|Name:tdbR00000008|SOterm:SO:0000254|Type:tRNA|Subtype:Ala|Feature:GGC|Cellular_Localization:prokaryotic cytosol|Species:Escherichia coli
+GGGGCUAUAGCUCAGCUGGGAGAGCGCUUGCAUGGCAUGCAAGAGGUCAGCGGUUCGAUCCCGCUUAGCUCCACCA
+>id:3|Name:tdbR00000356|SOterm:SO:0001036|Type:tRNA|Subtype:Arg|Feature:ICG|Cellular_Localization:prokaryotic cytosol|Species:Escherichia coli
+GCAUCCGUAGCUCAGCUGGAUAGAGUACUCGGCUACGAACCGAGCGGUCGGAGGUUCGAAUCCUCCCGGAUGCACCA
+>id:4|Name:tdbR00000359|SOterm:SO:0001036|Type:tRNA|Subtype:Arg|Feature:CCG|Cellular_Localization:prokaryotic cytosol|Species:Escherichia coli
+GCGCCCGUAGCUCAGCUGGAUAGAGCGCUGCCCUCCGGAGGCAGAGGUCUCAGGUUCGAAUCCUGUCGGGCGCGCCA
+>id:5|Name:tdbR00000358|SOterm:SO:0001036|Type:tRNA|Subtype:Arg|Feature:UCU|Cellular_Localization:prokaryotic cytosol|Species:Escherichia coli
+GCGCCCUUAGCUCAGUUGGAUAGAGCAACGACCUUCUAAGUCGUGGGCCGCAGGUUCGAAUCCUGCAGGGCGCGCCA
diff --git a/mzLib/Test/Transcriptomics/TestData/ModomicsUnmodifiedTrimmed.fasta.gz b/mzLib/Test/Transcriptomics/TestData/ModomicsUnmodifiedTrimmed.fasta.gz
new file mode 100644
index 0000000000000000000000000000000000000000..11ab87ef2427bfc92c81c872815097a5e770d643
GIT binary patch
literal 369
zcmV-%0gnD3iwFo?vchEq08MXXZ*6H~b5(9_Z)9m^X=P+oa%pXCWn?a9VRLk00JYN1
zYQr!L0N{I}@6bm`f9ZS}2(g*LXrY;R9j<FuGj}eod+6}PPfGfqT?$<iD@eA9Ka@we
z5Zye{D=X4sJ}auLTjX+@Y+Q+QD!9-a4)WV>O>1To$mMpPI03Kov(;%Et$3H1(R#o2
zI)1zcqrLml$%SvRj^RtkT}aM!%r1pZ@VQ)D<LpK}ZOqa}XO>QPOz^JVl60X01r(73
z<v=$|L`A8h07!$&QgMx53MD8+s_%Cq(9>?Qu6L~~#y9Ka|LZCk<rHW@t%EC_YUuh`
zy8du<sp}T~(O<gwdDOk2ypN8~PNzKAF%_sng-T8mpe4172JCsfk%_!Rp790^-owp2
zyc<tqgLmj2O;NQLVck8-FL)K07t14W*x-$C=FxZV$WsTNI`M$p##7WlU<LoSWWD57
P>52CXQGF&lSp)z8*Q2z4

literal 0
HcmV?d00001

diff --git a/mzLib/Test/Transcriptomics/TestDbLoader.cs b/mzLib/Test/Transcriptomics/TestDbLoader.cs
new file mode 100644
index 000000000..6e1725b8f
--- /dev/null
+++ b/mzLib/Test/Transcriptomics/TestDbLoader.cs
@@ -0,0 +1,149 @@
+﻿using NUnit.Framework;
+using Omics.Modifications;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using UsefulProteomicsDatabases.Transcriptomics;
+using UsefulProteomicsDatabases;
+using Transcriptomics;
+
+namespace Test.Transcriptomics
+{
+    [TestFixture]
+    [ExcludeFromCodeCoverage]
+    internal class TestDbLoader
+    {
+        public static string ModomicsUnmodifedFastaPath => Path.Combine(TestContext.CurrentContext.TestDirectory,
+            "Transcriptomics/TestData/ModomicsUnmodifiedTrimmed.fasta");
+
+        /// <summary>
+        /// Detect the headertype of the test cases
+        /// </summary>
+        private static IEnumerable<(string, RnaFastaHeaderType)> DetectHeaderTestCases =>
+            new List<(string, RnaFastaHeaderType)>
+            {
+                (Path.Combine(TestContext.CurrentContext.TestDirectory, "DoubleProtease.tsv"), RnaFastaHeaderType.Unknown),
+                (ModomicsUnmodifedFastaPath, RnaFastaHeaderType.Modomics),
+                (Path.Combine(TestContext.CurrentContext.TestDirectory, "Transcriptomics/TestData/ModomicsUnmodifiedTrimmed.fasta"), RnaFastaHeaderType.Modomics),
+                
+            };
+
+        /// <summary>
+        /// Test the correctness of checking headertype
+        /// </summary>
+        /// <param name="testData"></param>
+        [Test]
+        [TestCaseSource(nameof(DetectHeaderTestCases))]
+        public static void TestDetectHeaderType((string dbPath, RnaFastaHeaderType headerType) testData)
+        {
+            string line = File.ReadLines(testData.dbPath).First();
+            if (char.IsDigit(line.First()))
+            {
+                line = File.ReadLines(testData.dbPath).Skip(1).First();
+            }
+            var type = RnaDbLoader.DetectRnaFastaHeaderType(line);
+            Assert.That(testData.headerType, Is.EqualTo(type));
+        }
+
+
+        [Test]
+        [TestCase("ModomicsUnmodifiedTrimmed.fasta")]
+        [TestCase("ModomicsUnmodifiedTrimmed.fasta.gz")]
+        public static void TestModomicsUnmodifiedFasta(string databaseFileName)
+        {
+            var dbPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Transcriptomics", "TestData",
+                databaseFileName);
+            var oligos = RnaDbLoader.LoadRnaFasta(dbPath, true, DecoyType.None, false,
+                out var errors);
+            Assert.That(errors.Count, Is.EqualTo(0));
+            Assert.That(oligos.Count, Is.EqualTo(5));
+            Assert.That(oligos.First().BaseSequence,
+                Is.EqualTo("GGGGCUAUAGCUCAGCUGGGAGAGCGCCUGCUUUGCACGCAGGAGGUCUGCGGUUCGAUCCCGCAUAGCUCCACCA"));
+            Assert.That(oligos.First().Name, Is.EqualTo("tdbR00000010"));
+            Assert.That(oligos.First().Accession, Is.EqualTo("SO:0000254"));
+            Assert.That(oligos.First().Organism, Is.EqualTo("Escherichia coli"));
+            Assert.That(oligos.First().DatabaseFilePath, Is.EqualTo(dbPath));
+            Assert.That(oligos.First().IsContaminant, Is.False);
+            Assert.That(oligos.First().IsDecoy, Is.False);
+            Assert.That(oligos.First().AdditionalDatabaseFields!.Count, Is.EqualTo(5));
+            Assert.That(oligos.First().AdditionalDatabaseFields!["Id"], Is.EqualTo("1"));
+            Assert.That(oligos.First().AdditionalDatabaseFields!["Type"], Is.EqualTo("tRNA"));
+            Assert.That(oligos.First().AdditionalDatabaseFields!["Subtype"], Is.EqualTo("Ala"));
+            Assert.That(oligos.First().AdditionalDatabaseFields!["Feature"], Is.EqualTo("VGC"));
+            Assert.That(oligos.First().AdditionalDatabaseFields!["Cellular Localization"], Is.EqualTo("prokaryotic cytosol"));
+        }
+
+        [Test]
+        public static void TestContaminantFollowsThrough()
+        {
+            var oligos = RnaDbLoader.LoadRnaFasta(ModomicsUnmodifedFastaPath, true, DecoyType.None, true,
+                               out var errors);
+            Assert.That(errors.Count, Is.EqualTo(0));
+            Assert.That(oligos.Count, Is.EqualTo(5));
+            Assert.That(oligos.First().BaseSequence,
+                               Is.EqualTo("GGGGCUAUAGCUCAGCUGGGAGAGCGCCUGCUUUGCACGCAGGAGGUCUGCGGUUCGAUCCCGCAUAGCUCCACCA"));
+            Assert.That(oligos.All(p => p.IsContaminant));
+            Assert.That(oligos.All(p => !p.IsDecoy));
+        }
+
+        [Test]
+        public static void TestNotGeneratingTargetsOrDecoys()
+        {
+            var oligos = RnaDbLoader.LoadRnaFasta(ModomicsUnmodifedFastaPath, false, DecoyType.None, true,
+                out var errors);
+            Assert.That(errors.Count, Is.EqualTo(0));
+            Assert.That(oligos.Count, Is.EqualTo(0));
+        }
+
+        [Test]
+        public static void TestXmlWriterReader()
+        {
+            var rna = RnaDbLoader.LoadRnaFasta(ModomicsUnmodifedFastaPath, true, DecoyType.None, false, out var errors);
+            Assert.That(errors.Count, Is.EqualTo(0));
+
+            var modString = "ID   Methylation\r\nMT   Biological\r\nPP   Anywhere.\r\nTG   G\r\nCF   C1H2\r\n" + @"//";
+            var methylG = PtmListLoader.ReadModsFromString(modString, out List<(Modification, string)> modsOut).First();
+
+            Dictionary<string, HashSet<Tuple<int, Modification>>> mods = new Dictionary<string, HashSet<Tuple<int, Modification>>>();
+            mods.Add("SO:0000254", new HashSet<Tuple<int, Modification>>()
+            {
+                new Tuple<int, Modification>(1, methylG),
+                new Tuple<int, Modification>(3, methylG)
+            });
+
+            string outpath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Transcriptomics/TestData/ModomicsUnmodifiedTrimmed.xml");
+
+            var xml = ProteinDbWriter.WriteXmlDatabase(mods, rna, outpath);
+            var temp = RnaDbLoader.LoadRnaXML(outpath, true, DecoyType.None, false,
+                new List<Modification>() { methylG }, new List<string>(), out var unknownMods);
+
+            Assert.That(unknownMods.Count, Is.EqualTo(0));
+            Assert.That(temp.Count, Is.EqualTo(5));
+            var first = temp.First();
+            var loadedMods = first.OneBasedPossibleLocalizedModifications;
+            Assert.That(loadedMods.Count, Is.EqualTo(2));
+            Assert.That(loadedMods[1].Count, Is.EqualTo(1));
+            Assert.That(loadedMods[3].Count, Is.EqualTo(1));
+            Assert.That(loadedMods[1].First().IdWithMotif, Is.EqualTo(methylG.IdWithMotif));
+            Assert.That(loadedMods[3].First().IdWithMotif, Is.EqualTo(methylG.IdWithMotif));
+        }
+
+        [Test]
+        [TestCase("ATCG", "AUCG", true)]
+        [TestCase("ATCG", "UAGC", false)]
+        [TestCase("ATCGZ", "AUCGZ", true)]
+        [TestCase("ATCGZ", "UAGCZ", false)]
+        [TestCase("ATCGACGAATCACGATCAGTCATGCATTGCTAACT", "AUCGACGAAUCACGAUCAGUCAUGCAUUGCUAACU", true)]
+        [TestCase("ATCGACGAATCACGATCAGTCATGCATTGCTAACT", "UAGCUGCUUAGUGCUAGUCAGUACGUAACGAUUGA", false)]
+        [TestCase("ATCGACGAATCACGATCAGTCATGCATTGCTAACTATCGACGAATCACGATCAGTCATGCATTGCTAACTATCGACGAATCACGATCAGTCATGCATTGCTAACTATCGACGAATCACGATCAGTCATGCATTGCTAACTATCGACGAATCACGATCAGTCATGCATTGCTAACTATCGACGAATCACGATCAGTCATGCATTGCTAACT", "AUCGACGAAUCACGAUCAGUCAUGCAUUGCUAACUAUCGACGAAUCACGAUCAGUCAUGCAUUGCUAACUAUCGACGAAUCACGAUCAGUCAUGCAUUGCUAACUAUCGACGAAUCACGAUCAGUCAUGCAUUGCUAACUAUCGACGAAUCACGAUCAGUCAUGCAUUGCUAACUAUCGACGAAUCACGAUCAGUCAUGCAUUGCUAACU", true)]
+        [TestCase("ATCGACGAATCACGATCAGTCATGCATTGCTAACTATCGACGAATCACGATCAGTCATGCATTGCTAACTATCGACGAATCACGATCAGTCATGCATTGCTAACTATCGACGAATCACGATCAGTCATGCATTGCTAACTATCGACGAATCACGATCAGTCATGCATTGCTAACTATCGACGAATCACGATCAGTCATGCATTGCTAACT", "UAGCUGCUUAGUGCUAGUCAGUACGUAACGAUUGAUAGCUGCUUAGUGCUAGUCAGUACGUAACGAUUGAUAGCUGCUUAGUGCUAGUCAGUACGUAACGAUUGAUAGCUGCUUAGUGCUAGUCAGUACGUAACGAUUGAUAGCUGCUUAGUGCUAGUCAGUACGUAACGAUUGAUAGCUGCUUAGUGCUAGUCAGUACGUAACGAUUGA", false)]
+        public static void TestTranscribe(string input, string expected, bool isCodingStrand)
+        {
+            Assert.That(input.Transcribe(isCodingStrand), Is.EqualTo(expected));
+        }
+    }
+}
diff --git a/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs b/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs
new file mode 100644
index 000000000..ea5b2c22d
--- /dev/null
+++ b/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs
@@ -0,0 +1,227 @@
+﻿using NUnit.Framework;
+using Omics.Modifications;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using NUnit.Framework.Interfaces;
+using Transcriptomics;
+using UsefulProteomicsDatabases.Transcriptomics;
+using UsefulProteomicsDatabases;
+
+namespace Test.Transcriptomics
+{
+    [TestFixture]
+    [ExcludeFromCodeCoverage]
+    internal class TestDecoyGeneration
+    {
+        public static string ModomicsUnmodifiedFastaPath => TestDbLoader.ModomicsUnmodifedFastaPath;
+
+        [Test]
+        public static void TestReverseDecoy_Simple()
+        {
+            var oligos = new List<RNA>()
+            {
+                new RNA("GUUCUG"),
+                new RNA("GUGCUA"),
+            };
+            var decoys = RnaDecoyGenerator.GenerateDecoys(oligos, DecoyType.Reverse, 1);
+            Assert.That(decoys.Count, Is.EqualTo(2));
+            Assert.That(decoys[0].BaseSequence, Is.EqualTo("UCUUGG"));
+            Assert.That(decoys[1].BaseSequence, Is.EqualTo("UCGUGA"));
+
+            var example = oligos.First();
+            Assert.That(decoys.All(p => !p.IsContaminant));
+            Assert.That(decoys.All(p => p.IsDecoy));
+            Assert.That(decoys.All(p => p.DatabaseFilePath == example.DatabaseFilePath));
+            Assert.That(decoys.All(p => p.Organism == example.Organism));
+            Assert.That(decoys.All(p => p.AdditionalDatabaseFields == example.AdditionalDatabaseFields));
+            Assert.That(decoys.All(p => p.Accession == example.Accession));
+            Assert.That(decoys.All(p => p.Name == example.Name));
+            Assert.That(decoys.All(p => p.Length == example.Length));
+            Assert.That(decoys.All(p => Equals(p.FivePrimeTerminus, example.FivePrimeTerminus)));
+            Assert.That(decoys.All(p => Equals(p.ThreePrimeTerminus, example.ThreePrimeTerminus)));
+            Assert.That(decoys.All(p => p.OneBasedPossibleLocalizedModifications.Count == example.OneBasedPossibleLocalizedModifications.Count));
+        }
+
+        [Test]
+        [TestCase("GUACUG", 1, "UCAUGG", 5)]
+        [TestCase("GUACUA", 2, "UCAUGA", 4)]
+        [TestCase("GUACUA", 3, "UCAUGA", 3)]
+        [TestCase("GUACUA", 4, "UCAUGA", 2)]
+        [TestCase("GUCCAA", 5, "ACCUGA", 1)]
+        [TestCase("GUUCUA", 6, "UCUUGA", 6)]
+        public static void TestReverseDecoy_SimpleWithMods(string rnaSequence, int modPosition, string expectedDecoySequence, int expectedDecoyModPosition)
+        {
+            var mod = new Modification();
+            var oligos = new List<RNA>()
+            {
+                new RNA(rnaSequence, null, null,
+                    new Dictionary<int, List<Modification>>()
+                        { { modPosition, new List<Modification>() { mod } } }),
+            };
+            var decoys = RnaDecoyGenerator.GenerateDecoys(oligos, DecoyType.Reverse, 1);
+            Assert.That(decoys.Count, Is.EqualTo(1));
+
+            var decoy = decoys.First();
+            var originalRna = oligos.First();
+            Assert.That(decoy.BaseSequence, Is.EqualTo(expectedDecoySequence));
+            Assert.That(decoy.OneBasedPossibleLocalizedModifications.Count, Is.EqualTo(1));
+            Assert.That(decoy.OneBasedPossibleLocalizedModifications.First().Key, Is.EqualTo(expectedDecoyModPosition));
+            Assert.That(decoy.OneBasedPossibleLocalizedModifications.First().Value.Count, Is.EqualTo(1));
+            Assert.That(decoy.OneBasedPossibleLocalizedModifications.First().Value.First(), Is.EqualTo(mod));
+            Assert.That(decoy.Name, Is.EqualTo(originalRna.Name));
+            Assert.That(decoy.Accession, Is.EqualTo(originalRna.Accession));
+            Assert.That(decoy.Organism, Is.EqualTo(originalRna.Organism));
+            Assert.That(decoy.DatabaseFilePath, Is.EqualTo(originalRna.DatabaseFilePath));
+            Assert.That(decoy.IsContaminant, Is.EqualTo(originalRna.IsContaminant));
+            Assert.That(decoy.IsDecoy, Is.True);
+            Assert.That(decoy.AdditionalDatabaseFields, Is.EqualTo(originalRna.AdditionalDatabaseFields));
+            Assert.That(decoy.FivePrimeTerminus, Is.EqualTo(originalRna.FivePrimeTerminus));
+            Assert.That(decoy.ThreePrimeTerminus, Is.EqualTo(originalRna.ThreePrimeTerminus));
+        }
+
+        [Test]
+        public void TestReverseDecoy_FromDatabase()
+        {
+            int numSequences = 5;
+            Dictionary<string, string> expectedSequences = new Dictionary<string, string>()
+            {
+                { "tdbR00000010", "CCACCUCGAUACGCCCUAGCUUGGCGUCUGGAGGACGCACGUUUCGUCCGCGAGAGGGUCGACUCGAUAUCGGGGA"},
+                { "tdbR00000008", "CCACCUCGAUUCGCCCUAGCUUGGCGACUGGAGAACGUACGGUACGUUCGCGAGAGGGUCGACUCGAUAUCGGGGA"},
+                { "tdbR00000356", "CCACGUAGGCCCUCCUAAGCUUGGAGGCUGGCGAGCCAAGCAUCGGCUCAUGAGAUAGGUCGACUCGAUGCCUACGA"},
+                { "tdbR00000359", "CCGCGCGGGCUGUCCUAAGCUUGGACUCUGGAGACGGAGGCCUCCCGUCGCGAGAUAGGUCGACUCGAUGCCCGCGA"},
+                { "tdbR00000358", "CCGCGCGGGACGUCCUAAGCUUGGACGCCGGGUGCUGAAUCUUCCAGCAACGAGAUAGGUUGACUCGAUUCCCGCGA"},
+            };
+
+            var oligos = RnaDbLoader.LoadRnaFasta(ModomicsUnmodifiedFastaPath, true, DecoyType.Reverse, false,
+                out var errors);
+            Assert.That(errors.Count, Is.EqualTo(0));
+            Assert.That(oligos.Count, Is.EqualTo(numSequences * 2));
+            Assert.That(oligos.Count(p => p.IsDecoy), Is.EqualTo(numSequences));
+            Assert.That(oligos.Count(p => !p.IsDecoy), Is.EqualTo(numSequences));
+
+            foreach (var targetDecoyGroup in oligos.GroupBy(p => p.Name))
+            {
+                Assert.That(targetDecoyGroup.Count(), Is.EqualTo(2));
+                var target = targetDecoyGroup.First(p => !p.IsDecoy);
+                var decoy = targetDecoyGroup.First(p => p.IsDecoy);
+                var expectedSequence = expectedSequences[target.Name];
+
+                Assert.That(target.FivePrimeTerminus, Is.EqualTo(decoy.FivePrimeTerminus));
+                Assert.That(target.ThreePrimeTerminus, Is.EqualTo(decoy.ThreePrimeTerminus));
+                Assert.That(target.AdditionalDatabaseFields, Is.EqualTo(decoy.AdditionalDatabaseFields));
+                Assert.That(target.IsContaminant, Is.EqualTo(decoy.IsContaminant));
+                Assert.That(target.DatabaseFilePath, Is.EqualTo(decoy.DatabaseFilePath));
+                Assert.That(target.DatabaseFilePath, Is.EqualTo(ModomicsUnmodifiedFastaPath));
+                Assert.That(target.Organism, Is.EqualTo(decoy.Organism));
+                Assert.That(target.Accession, Is.EqualTo(decoy.Accession));
+                Assert.That(target.Name, Is.EqualTo(decoy.Name));
+                Assert.That(target.Length, Is.EqualTo(decoy.Length));
+                Assert.That(target.OneBasedPossibleLocalizedModifications.Count, Is.EqualTo(decoy.OneBasedPossibleLocalizedModifications.Count));
+
+                Assert.That(decoy.BaseSequence, Is.EqualTo(expectedSequence));
+            }
+        }
+
+
+        // TODO: Implement these test once other decoy generation methods are availiable
+
+        [Test]
+        public void TestShuffledDecoy_Simple()
+        {
+            var oligos = new List<RNA>()
+            {
+                new RNA("GUACUG"),
+                new RNA("GUACUA"),
+            };
+            Assert.Throws<NotImplementedException>(() =>
+            {
+                var decoys = RnaDecoyGenerator.GenerateDecoys(oligos, DecoyType.Shuffle);
+            });
+
+
+            //var decoys = RnaDecoyGenerator.GenerateDecoys(oligos, DecoyType.Shuffle);
+            //Assert.That(decoys.Count, Is.EqualTo(2));
+        }
+
+        [Test]
+        public void TestShuffledDecoy_SimpleWithMods()
+        {
+            var oligos = new List<RNA>()
+            {
+                new RNA("GUACUG"),
+                new RNA("GUACUA"),
+            };
+            Assert.Throws<NotImplementedException>(() =>
+            {
+                var decoys = RnaDecoyGenerator.GenerateDecoys(oligos, DecoyType.Shuffle);
+            });
+            //var decoys = RnaDecoyGenerator.GenerateDecoys(oligos, DecoyType.Shuffle);
+            //Assert.That(decoys.Count, Is.EqualTo(2));
+        }
+
+        [Test]
+        public void TestShuffledDecoy_FromDatabase()
+        {
+            Assert.Throws<NotImplementedException>(() =>
+            {
+                var oligos = RnaDbLoader.LoadRnaFasta(ModomicsUnmodifiedFastaPath, true, DecoyType.Shuffle, false, out var errors);
+            });
+
+            //var oligos = RnaDbLoader.LoadRnaFasta(ModomicsUnmodifiedFastaPath, true, DecoyType.Shuffle, false, out var errors);
+            //Assert.That(errors.Count, Is.EqualTo(0));
+            //Assert.That(oligos.Count, Is.EqualTo(10));
+        }
+
+        [Test]
+        public void TestSlideDecoy_Simple()
+        {
+            var oligos = new List<RNA>()
+            {
+                new RNA("GUACUG"),
+                new RNA("GUACUA"),
+            };
+            Assert.Throws<NotImplementedException>(() =>
+            {
+                var decoys = RnaDecoyGenerator.GenerateDecoys(oligos, DecoyType.Slide);
+            });
+
+            //var decoys = RnaDecoyGenerator.GenerateDecoys(oligos, DecoyType.Slide);
+            //Assert.That(decoys.Count, Is.EqualTo(2));
+        }
+
+        [Test]
+        public void TestSlideDecoy_SimpleWithMods()
+        {
+            var oligos = new List<RNA>()
+            {
+                new RNA("GUACUG"),
+                new RNA("GUACUA"),
+            };
+
+            Assert.Throws<NotImplementedException>(() =>
+            {
+                var decoys = RnaDecoyGenerator.GenerateDecoys(oligos, DecoyType.Slide);
+            });
+
+            //var decoys = RnaDecoyGenerator.GenerateDecoys(oligos, DecoyType.Slide);
+            //Assert.That(decoys.Count, Is.EqualTo(2));
+        }
+
+        [Test]
+        public void TestSlideDecoy_FromDatabase()
+        {
+            Assert.Throws<NotImplementedException>(() =>
+            {
+                var oligos = RnaDbLoader.LoadRnaFasta(ModomicsUnmodifiedFastaPath, true, DecoyType.Shuffle, false, out var errors);
+            });
+
+            //var oligos = RnaDbLoader.LoadRnaFasta(ModomicsUnmodifiedFastaPath, true, DecoyType.Slide, false, out var errors);
+            //Assert.That(errors.Count, Is.EqualTo(0));
+            //Assert.That(oligos.Count, Is.EqualTo(10));
+        }
+    }
+}
diff --git a/mzLib/Transcriptomics/ClassExtensions.cs b/mzLib/Transcriptomics/ClassExtensions.cs
index a61c5c837..2ac37a67d 100644
--- a/mzLib/Transcriptomics/ClassExtensions.cs
+++ b/mzLib/Transcriptomics/ClassExtensions.cs
@@ -77,5 +77,45 @@ public static T CreateNew<T>(this T target, string? sequence = null, IDictionary
 
             return (T)returnObj ?? throw new NullReferenceException("Error creating new INucleicAcid");
         }
+
+        /// <summary>
+        /// Transcribes a DNA sequence into an RNA sequence
+        /// </summary>
+        /// <param name="dna">The input dna sequence</param>
+        /// <param name="isCodingStrand">True if the input sequence is the coding strand, False if the input sequence is the template strand</param>
+        /// <returns></returns>
+        public static string Transcribe(this string dna, bool isCodingStrand = true)
+        {
+            var sb = new StringBuilder();
+            foreach (var residue in dna)
+            {
+                if (isCodingStrand)
+                {
+                    sb.Append(residue == 'T' ? 'U' : residue);
+                }
+                else
+                {
+                    switch (residue)
+                    {
+                        case 'A':
+                            sb.Append('U');
+                            break;
+                        case 'T':
+                            sb.Append('A');
+                            break;
+                        case 'C':
+                            sb.Append('G');
+                            break;
+                        case 'G':
+                            sb.Append('C');
+                            break;
+                        default:
+                            sb.Append(residue);
+                            break;
+                    }
+                }
+            }
+            return sb.ToString();
+        }
     }
 }
diff --git a/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs b/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
index 7f98597a4..a2ad1741b 100644
--- a/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
+++ b/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
@@ -4,6 +4,9 @@
 
 namespace Transcriptomics.Digestion
 {
+    /// <summary>
+    /// The most basic form of a digested oligo, this class does not care about mass or formula, just base sequence
+    /// </summary>
     public class NucleolyticOligo : DigestionProduct
     {
         protected IHasChemicalFormula _fivePrimeTerminus;
diff --git a/mzLib/UsefulProteomicsDatabases/FastaHeaderFieldRegex.cs b/mzLib/UsefulProteomicsDatabases/FastaHeaderFieldRegex.cs
index b70e3dc23..51978b2db 100644
--- a/mzLib/UsefulProteomicsDatabases/FastaHeaderFieldRegex.cs
+++ b/mzLib/UsefulProteomicsDatabases/FastaHeaderFieldRegex.cs
@@ -19,5 +19,17 @@ public FastaHeaderFieldRegex(string fieldName, string regularExpression, int mat
         public int Match { get; }
 
         public int Group { get; }
+
+        public string ApplyRegex(string input)
+        {
+            string? result = null;
+            var matches = Regex.Matches(input);
+            if (matches.Count > Match && matches[Match].Groups.Count > Group)
+            {
+                result = matches[Match].Groups[Group].Value;
+            }
+
+            return result!;
+        }
     }
 }
\ No newline at end of file
diff --git a/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs b/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs
index 155945558..d47912f4d 100644
--- a/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs
+++ b/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs
@@ -5,12 +5,283 @@
 using System.IO;
 using System.Linq;
 using System.Xml;
+using Easy.Common.Extensions;
+using Omics;
 using Omics.Modifications;
+using Transcriptomics;
 
 namespace UsefulProteomicsDatabases
 {
+
+    /// <summary>
+    /// Provides methods for writing protein and nucleic acid databases to XML and FASTA formats.
+    /// Did not rename to DbWriter to ensure compatibility with the original UsefulProteomicsDatabases namespace.
+    /// </summary>
     public class ProteinDbWriter
     {
+        /// <summary>
+        /// Writes an XML database for a list of RNA sequences, including additional modifications.
+        /// </summary>
+        /// <param name="additionalModsToAddToProteins">A dictionary of additional modifications to add to proteins.</param>
+        /// <param name="bioPolymerList">A list of RNA sequences to be written to the database.</param>
+        /// <param name="outputFileName">The name of the output XML file.</param>
+        /// <returns>A dictionary of new modification residue entries.</returns>
+        public static Dictionary<string, int> WriteXmlDatabase(
+            Dictionary<string, HashSet<Tuple<int, Modification>>> additionalModsToAddToProteins,
+            List<RNA> bioPolymerList, string outputFileName) => WriteNucleicAcidXmlDatabase(additionalModsToAddToProteins, bioPolymerList.Cast<NucleicAcid>().ToList(), outputFileName);
+
+        /// <summary>
+        /// Writes an XML database for a list of nucleic acid sequences, including additional modifications.
+        /// </summary>
+        /// <param name="additionalModsToAddToProteins">A dictionary of additional modifications to add to proteins.</param>
+        /// <param name="nucleicAcidList">A list of nucleic acid sequences to be written to the database.</param>
+        /// <param name="outputFileName">The name of the output XML file.</param>
+        /// <returns>A dictionary of new modification residue entries.</returns>
+        private static Dictionary<string, int> WriteNucleicAcidXmlDatabase(
+            Dictionary<string, HashSet<Tuple<int, Modification>>> additionalModsToAddToProteins,
+            List<NucleicAcid> nucleicAcidList, string outputFileName)
+        {
+            additionalModsToAddToProteins = additionalModsToAddToProteins ?? new Dictionary<string, HashSet<Tuple<int, Modification>>>();
+            var xmlWriterSettings = new XmlWriterSettings
+            {
+                Indent = true,
+                IndentChars = "  "
+            };
+
+            Dictionary<string, int> newModResEntries = new Dictionary<string, int>();
+            using (XmlWriter writer = XmlWriter.Create(outputFileName, xmlWriterSettings))
+            {
+                writer.WriteStartDocument();
+                writer.WriteStartElement("mzLibProteinDb");
+
+                List<Modification> myModificationList = new List<Modification>();
+                foreach (var p in nucleicAcidList)
+                {
+                    foreach (KeyValuePair<int, List<Modification>> entry in p.OneBasedPossibleLocalizedModifications)
+                    {
+                        myModificationList.AddRange(entry.Value);
+                    }
+                }
+
+                // get modifications from nucleic acid list and concatenate the modifications discovered in GPTMDictionary
+                var allRelevantModifications =
+                    new HashSet<Modification>(nucleicAcidList
+                        .SelectMany(p => p.OneBasedPossibleLocalizedModifications.SelectMany(m => m.Value))
+                        .Concat(additionalModsToAddToProteins
+                            .Where(n => nucleicAcidList.Select(nu => nu.Accession).Contains(n.Key))
+                            .SelectMany(kv => kv.Value.Select(v => v.Item2))));
+
+                foreach (Modification mod in allRelevantModifications.OrderBy(m => m.IdWithMotif))
+                {
+                    writer.WriteStartElement("modification");
+                    writer.WriteString(mod.ToString() + Environment.NewLine + "//");
+                    writer.WriteEndElement();
+                }
+
+                foreach (var nucleicAcid in nucleicAcidList)
+                {
+                    writer.WriteStartElement("entry");
+                    writer.WriteStartElement("accession");
+                    writer.WriteString(nucleicAcid.Accession);
+                    writer.WriteEndElement();
+
+                    if (nucleicAcid.Name.IsNotNullOrEmptyOrWhiteSpace())
+                    {
+                        writer.WriteStartElement("name");
+                        writer.WriteString(nucleicAcid.Name);
+                        writer.WriteEndElement();
+                    }
+
+                    if (nucleicAcid.FullName.IsNotNullOrEmptyOrWhiteSpace())
+                    {
+                        writer.WriteStartElement("protein");
+                        writer.WriteStartElement("recommendedName");
+                        writer.WriteStartElement("fullName");
+                        writer.WriteString(nucleicAcid.FullName);
+                        writer.WriteEndElement();
+                        writer.WriteEndElement();
+                        writer.WriteEndElement();
+                    }
+
+                    writer.WriteStartElement("gene");
+                    foreach (var geneName in nucleicAcid.GeneNames)
+                    {
+                        writer.WriteStartElement("name");
+                        writer.WriteAttributeString("type", geneName.Item1);
+                        writer.WriteString(geneName.Item2);
+                        writer.WriteEndElement();
+                    }
+                    writer.WriteEndElement();
+
+                    if (nucleicAcid.Organism.IsNotNullOrEmptyOrWhiteSpace())
+                    {
+                        writer.WriteStartElement("organism");
+                        writer.WriteStartElement("name");
+                        writer.WriteAttributeString("type", "scientific");
+                        writer.WriteString(nucleicAcid.Organism);
+                        writer.WriteEndElement();
+                        writer.WriteEndElement();
+                    }
+
+                    //foreach (var dbRef in nucleicAcid)
+                    //{
+                    //    writer.WriteStartElement("dbReference");
+                    //    writer.WriteAttributeString("type", dbRef.Type);
+                    //    writer.WriteAttributeString("id", dbRef.Id);
+                    //    foreach (Tuple<string, string> property in dbRef.Properties)
+                    //    {
+                    //        writer.WriteStartElement("property");
+                    //        writer.WriteAttributeString("type", property.Item1);
+                    //        writer.WriteAttributeString("value", property.Item2);
+                    //        writer.WriteEndElement();
+                    //    }
+                    //    writer.WriteEndElement();
+                    //}
+
+                    ////for now we are not going to write top-down truncations generated for top-down truncation search. 
+                    ////some day we could write those if observed
+                    ////the truncation designation is contained in the "type" field of ProteolysisProduct
+                    //List<ProteolysisProduct> proteolysisProducts = nucleicAcid.ProteolysisProducts.Where(p => !p.Type.Contains("truncation")).ToList();
+                    //foreach (var proteolysisProduct in proteolysisProducts)
+                    //{
+                    //    writer.WriteStartElement("feature");
+                    //    writer.WriteAttributeString("type", proteolysisProduct.Type.Split('(')[0]);
+                    //    writer.WriteStartElement("location");
+                    //    writer.WriteStartElement("begin");
+                    //    writer.WriteAttributeString("position", proteolysisProduct.OneBasedBeginPosition.ToString());
+                    //    writer.WriteEndElement();
+                    //    writer.WriteStartElement("end");
+                    //    writer.WriteAttributeString("position", proteolysisProduct.OneBasedEndPosition.ToString());
+                    //    writer.WriteEndElement();
+                    //    writer.WriteEndElement();
+                    //    writer.WriteEndElement();
+                    //}
+
+                    foreach (var hm in GetModsForThisBioPolymer(nucleicAcid, null, additionalModsToAddToProteins, newModResEntries).OrderBy(b => b.Key))
+                    {
+                        foreach (var modId in hm.Value)
+                        {
+                            writer.WriteStartElement("feature");
+                            writer.WriteAttributeString("type", "modified residue");
+                            writer.WriteAttributeString("description", modId);
+                            writer.WriteStartElement("location");
+                            writer.WriteStartElement("position");
+                            writer.WriteAttributeString("position", hm.Key.ToString(CultureInfo.InvariantCulture));
+                            writer.WriteEndElement();
+                            writer.WriteEndElement();
+                            writer.WriteEndElement();
+                        }
+                    }
+
+                    //foreach (var hm in nucleicAcid.SequenceVariations)
+                    //{
+                    //    writer.WriteStartElement("feature");
+                    //    writer.WriteAttributeString("type", "sequence variant");
+                    //    writer.WriteAttributeString("description", hm.Description.ToString());
+                    //    writer.WriteStartElement("original");
+                    //    writer.WriteString(hm.OriginalSequence);
+                    //    writer.WriteEndElement(); // original
+                    //    writer.WriteStartElement("variation");
+                    //    writer.WriteString(hm.VariantSequence);
+                    //    writer.WriteEndElement(); // variation
+                    //    writer.WriteStartElement("location");
+                    //    if (hm.OneBasedBeginPosition == hm.OneBasedEndPosition)
+                    //    {
+                    //        writer.WriteStartElement("position");
+                    //        writer.WriteAttributeString("position", hm.OneBasedBeginPosition.ToString());
+                    //        writer.WriteEndElement();
+                    //    }
+                    //    else
+                    //    {
+                    //        writer.WriteStartElement("begin");
+                    //        writer.WriteAttributeString("position", hm.OneBasedBeginPosition.ToString());
+                    //        writer.WriteEndElement();
+                    //        writer.WriteStartElement("end");
+                    //        writer.WriteAttributeString("position", hm.OneBasedEndPosition.ToString());
+                    //        writer.WriteEndElement();
+                    //    }
+                    //    foreach (var hmm in GetModsForThisProtein(nucleicAcid, hm, additionalModsToAddToProteins, newModResEntries).OrderBy(b => b.Key))
+                    //    {
+                    //        foreach (var modId in hmm.Value)
+                    //        {
+                    //            writer.WriteStartElement("subfeature");
+                    //            writer.WriteAttributeString("type", "modified residue");
+                    //            writer.WriteAttributeString("description", modId);
+                    //            writer.WriteStartElement("location");
+                    //            writer.WriteStartElement("subposition");
+                    //            writer.WriteAttributeString("subposition", hmm.Key.ToString(CultureInfo.InvariantCulture));
+                    //            writer.WriteEndElement();
+                    //            writer.WriteEndElement();
+                    //            writer.WriteEndElement();
+                    //        }
+                    //    }
+                    //    writer.WriteEndElement(); // location
+                    //    writer.WriteEndElement(); // feature
+                    //}
+
+                    //foreach (var hm in nucleicAcid.DisulfideBonds)
+                    //{
+                    //    writer.WriteStartElement("feature");
+                    //    writer.WriteAttributeString("type", "disulfide bond");
+                    //    writer.WriteAttributeString("description", hm.Description);
+                    //    writer.WriteStartElement("location");
+                    //    if (hm.OneBasedBeginPosition == hm.OneBasedEndPosition)
+                    //    {
+                    //        writer.WriteStartElement("position");
+                    //        writer.WriteAttributeString("position", hm.OneBasedBeginPosition.ToString());
+                    //        writer.WriteEndElement();
+                    //    }
+                    //    else
+                    //    {
+                    //        writer.WriteStartElement("begin");
+                    //        writer.WriteAttributeString("position", hm.OneBasedBeginPosition.ToString());
+                    //        writer.WriteEndElement();
+                    //        writer.WriteStartElement("end");
+                    //        writer.WriteAttributeString("position", hm.OneBasedEndPosition.ToString());
+                    //        writer.WriteEndElement();
+                    //    }
+                    //    writer.WriteEndElement(); // location
+                    //    writer.WriteEndElement(); // feature
+                    //}
+
+                    //foreach (var hm in nucleicAcid.SpliceSites)
+                    //{
+                    //    writer.WriteStartElement("feature");
+                    //    writer.WriteAttributeString("type", "splice site");
+                    //    writer.WriteAttributeString("description", hm.Description);
+                    //    writer.WriteStartElement("location");
+                    //    if (hm.OneBasedBeginPosition == hm.OneBasedEndPosition)
+                    //    {
+                    //        writer.WriteStartElement("position");
+                    //        writer.WriteAttributeString("position", hm.OneBasedBeginPosition.ToString());
+                    //        writer.WriteEndElement();
+                    //    }
+                    //    else
+                    //    {
+                    //        writer.WriteStartElement("begin");
+                    //        writer.WriteAttributeString("position", hm.OneBasedBeginPosition.ToString());
+                    //        writer.WriteEndElement();
+                    //        writer.WriteStartElement("end");
+                    //        writer.WriteAttributeString("position", hm.OneBasedEndPosition.ToString());
+                    //        writer.WriteEndElement();
+                    //    }
+                    //    writer.WriteEndElement(); // location
+                    //    writer.WriteEndElement(); // feature
+                    //}
+
+                    writer.WriteStartElement("sequence");
+                    writer.WriteAttributeString("length", nucleicAcid.Length.ToString(CultureInfo.InvariantCulture));
+                    writer.WriteString(nucleicAcid.BaseSequence);
+                    writer.WriteEndElement(); // sequence
+                    writer.WriteEndElement(); // entry
+                }
+
+                writer.WriteEndElement(); // mzLibProteinDb
+                writer.WriteEndDocument();
+            }
+            return newModResEntries;
+        }
+
         /// <summary>
         /// Writes a protein database in mzLibProteinDb format, with additional modifications from the AdditionalModsToAddToProteins list.
         /// </summary>
@@ -138,7 +409,7 @@ public static Dictionary<string, int> WriteXmlDatabase(Dictionary<string, HashSe
                         writer.WriteEndElement();
                     }
 
-                    foreach (var hm in GetModsForThisProtein(protein, null, additionalModsToAddToProteins, newModResEntries).OrderBy(b => b.Key))
+                    foreach (var hm in GetModsForThisBioPolymer(protein, null, additionalModsToAddToProteins, newModResEntries).OrderBy(b => b.Key))
                     {
                         foreach (var modId in hm.Value)
                         {
@@ -181,7 +452,7 @@ public static Dictionary<string, int> WriteXmlDatabase(Dictionary<string, HashSe
                             writer.WriteAttributeString("position", hm.OneBasedEndPosition.ToString());
                             writer.WriteEndElement();
                         }
-                        foreach (var hmm in GetModsForThisProtein(protein, hm, additionalModsToAddToProteins, newModResEntries).OrderBy(b => b.Key))
+                        foreach (var hmm in GetModsForThisBioPolymer(protein, hm, additionalModsToAddToProteins, newModResEntries).OrderBy(b => b.Key))
                         {
                             foreach (var modId in hmm.Value)
                             {
@@ -276,7 +547,7 @@ public static void WriteFastaDatabase(List<Protein> proteinList, string outputFi
             }
         }
 
-        private static Dictionary<int, HashSet<string>> GetModsForThisProtein(Protein protein, SequenceVariation seqvar, Dictionary<string, HashSet<Tuple<int, Modification>>> additionalModsToAddToProteins, Dictionary<string, int> newModResEntries)
+        private static Dictionary<int, HashSet<string>> GetModsForThisBioPolymer(IBioPolymer protein, SequenceVariation seqvar, Dictionary<string, HashSet<Tuple<int, Modification>>> additionalModsToAddToProteins, Dictionary<string, int> newModResEntries)
         {
             var modsToWriteForThisSpecificProtein = new Dictionary<int, HashSet<string>>();
 
@@ -292,7 +563,8 @@ private static Dictionary<int, HashSet<string>> GetModsForThisProtein(Protein pr
                 }
             }
 
-            string accession = seqvar == null ? protein.Accession : VariantApplication.GetAccession(protein, new[] { seqvar });
+            // This cast to protein is okay as no sequence variation is programmed to RNA as of 9/24/24
+            string accession = seqvar == null ? protein.Accession : VariantApplication.GetAccession(protein as Protein, new[] { seqvar }); 
             if (additionalModsToAddToProteins.ContainsKey(accession))
             {
                 foreach (var ye in additionalModsToAddToProteins[accession])
diff --git a/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs b/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs
index 1828f7b4c..13aa82fb2 100644
--- a/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs
+++ b/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs
@@ -22,6 +22,19 @@ public enum RnaFastaHeaderType
     public static class RnaDbLoader
     {
 
+        #region Header Detection and Property Regexes
+
+        public static RnaFastaHeaderType DetectRnaFastaHeaderType(string line)
+        {
+            if (line.StartsWith(">id"))
+                return RnaFastaHeaderType.Modomics;
+
+            return RnaFastaHeaderType.Unknown;
+        }
+
+        /// <summary>
+        /// Dictionary that extract accession number, species, name, and additional dataField of modomics
+        /// </summary>
         public static readonly Dictionary<string, FastaHeaderFieldRegex> ModomicsFieldRegexes =
             new Dictionary<string, FastaHeaderFieldRegex>()
             {
@@ -35,6 +48,20 @@ public static class RnaDbLoader
                 { "Cellular Localization", new FastaHeaderFieldRegex("CellularLocalization", @"Cellular_Localization:(?<Cellular_Localization>.+?)\|", 0, 1) },
             };
 
+        #endregion
+
+        /// <summary>
+        /// Loads an RNA file from the specified location, optionally generating decoys and adding error tracking
+        /// </summary>
+        /// <param name="rnaDbLocation">The file path to the RNA FASTA database</param>
+        /// <param name="generateTargets">Flag indicating whether to generate targets or not</param>
+        /// <param name="decoyType">The type of decoy generation to apply</param>
+        /// <param name="isContaminant">Indicates if the RNA sequence is a contaminant</param>
+        /// <param name="errors">Outputs any errors encountered during the process</param>
+        /// <param name="fivePrimeTerm">An optional 5' prime chemical modification term</param>
+        /// <param name="threePrimeTerm">An optional 3' prime chemical modification term</param>
+        /// <returns>A list of RNA sequences loaded from the FASTA database</returns>
+        /// <exception cref="MzLibUtil.MzLibException">Thrown if the FASTA header format is unknown or other issues occur during loading.</exception>
 
         public static List<RNA> LoadRnaFasta(string rnaDbLocation, bool generateTargets, DecoyType decoyType,
             bool isContaminant, out List<string> errors, IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null)
@@ -78,7 +105,7 @@ public static List<RNA> LoadRnaFasta(string rnaDbLocation, bool generateTargets,
                     {
                         if (headerType is null)
                         {
-                            headerType = DetectFastaHeaderType(line);
+                            headerType = DetectRnaFastaHeaderType(line);
 
                             switch (headerType)
                             {
@@ -86,9 +113,6 @@ public static List<RNA> LoadRnaFasta(string rnaDbLocation, bool generateTargets,
                                     regexes = ModomicsFieldRegexes;
                                     identifierHeader = "SOterm";
                                     break;
-
-                                case RnaFastaHeaderType.Unknown:
-                                case null:
                                 default:
                                     throw new MzLibUtil.MzLibException("Unknown fasta header format: " + line);
                             }
@@ -105,9 +129,9 @@ public static List<RNA> LoadRnaFasta(string rnaDbLocation, bool generateTargets,
 
                         sb = new StringBuilder();
                     }
-                    else if (sb is not null)
+                    else
                     {
-                        sb.Append(line.Trim());
+                        sb?.Append(line.Trim());
                     }
 
                     if ((fasta.Peek() == '>' || fasta.Peek() == -1) /*&& accession != null*/ && sb != null)
@@ -150,14 +174,7 @@ public static List<RNA> LoadRnaFasta(string rnaDbLocation, bool generateTargets,
             return generateTargets ? targets.Concat(decoys).ToList() : decoys;
         }
 
-        private static RnaFastaHeaderType DetectFastaHeaderType(string line)
-        {
-            if (!line.StartsWith(">"))
-                return RnaFastaHeaderType.Unknown;
-            
-
-            return RnaFastaHeaderType.Modomics;
-        }
+        
 
         private static Dictionary<string, string> ParseRegexFields(string line,
             Dictionary<string, FastaHeaderFieldRegex> regexes)
@@ -166,14 +183,13 @@ private static Dictionary<string, string> ParseRegexFields(string line,
 
             foreach (var regex in regexes)
             {
-                string match = ProteinDbLoader.ApplyRegex(regex.Value, line);
+                string match = regex.Value.ApplyRegex(line);
                 fields.Add(regex.Key, match);
             }
 
             return fields;
         }
 
-
         public static Dictionary<string, IList<Modification>> IdToPossibleMods = new Dictionary<string, IList<Modification>>();
         public static Dictionary<string, Modification> IdWithMotifToMod = new Dictionary<string, Modification>();
 
diff --git a/mzLib/mzLib.sln.DotSettings b/mzLib/mzLib.sln.DotSettings
index 06594535d..6522afcd5 100644
--- a/mzLib/mzLib.sln.DotSettings
+++ b/mzLib/mzLib.sln.DotSettings
@@ -4,9 +4,11 @@
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Deconvolute/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=deconvoluted/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Deconvoluter/@EntryIndexedValue">True</s:Boolean>
+	<s:Boolean x:Key="/Default/UserDictionary/Words/=Modomics/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Nucleolytic/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Oligo/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Prsm/@EntryIndexedValue">True</s:Boolean>
+	<s:Boolean x:Key="/Default/UserDictionary/Words/=Regexes/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Toppic/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Monoisotopic/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Transcriptomics/@EntryIndexedValue">True</s:Boolean>

From d75f75207b45339b0260d8b2c56cdf64fbe98db5 Mon Sep 17 00:00:00 2001
From: Nic Bollis <nbollis@wisc.edu>
Date: Tue, 24 Sep 2024 17:49:35 -0500
Subject: [PATCH 10/17] Refactor and enhance RNA and oligo handling in tests

- Added `using` directives for `Transcriptomics.Digestion` and `UsefulProteomicsDatabases.Transcriptomics` in `TestDecoyGenerator.cs`.
- Introduced `TestCreateNew` in `TestDecoyGenerator.cs` to verify RNA and oligo creation.
- Added `using` directive for `MzLibUtil` in `TestDigestion.cs`.
- Added a test in `TestDigestion.cs` for exception handling with invalid sequences.
- Added `using` directives for `Omics` and related namespaces in `TestFragmentation.cs`.
- Modified `TestFragmentation_Modified` in `TestFragmentation.cs` to use `OligoWithSetMods` directly and added assertions.
- Updated `ClassExtensions.cs` to allow setting `isDecoy` in new `RNA` objects.
- Refactored `OligoWithSetMods.cs` to return a dictionary from `GetModsAfterDeserialization`.
- Updated `OligoWithSetMods.cs` to initialize `_allModsOneIsNterminus` using the returned dictionary.
---
 .../Transcriptomics/TestDecoyGenerator.cs     | 38 +++++++++++++++++++
 mzLib/Test/Transcriptomics/TestDigestion.cs   |  6 +++
 .../Test/Transcriptomics/TestFragmentation.cs | 16 ++++++--
 mzLib/Transcriptomics/ClassExtensions.cs      |  3 +-
 .../Digestion/OligoWithSetMods.cs             | 11 ++++--
 5 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs b/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs
index ea5b2c22d..800126b1a 100644
--- a/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs
+++ b/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs
@@ -8,6 +8,7 @@
 using System.Threading.Tasks;
 using NUnit.Framework.Interfaces;
 using Transcriptomics;
+using Transcriptomics.Digestion;
 using UsefulProteomicsDatabases.Transcriptomics;
 using UsefulProteomicsDatabases;
 
@@ -223,5 +224,42 @@ public void TestSlideDecoy_FromDatabase()
             //Assert.That(errors.Count, Is.EqualTo(0));
             //Assert.That(oligos.Count, Is.EqualTo(10));
         }
+
+
+        [Test]
+        public void TestCreateNew()
+        {
+            var mods = PtmListLoader.ReadModsFromString(
+                "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//",
+                out List<(Modification, string)> modsOut).ToList();
+            var modDict = mods.ToDictionary(p => p.IdWithMotif, p => p);
+            var oneBasedPossibleLocalizedModifications = new Dictionary<int, List<Modification>>()
+            {
+                { 1, new List<Modification>() { modDict["Sodium on A"] } },
+                { 3, new List<Modification>() { modDict["Sodium on A"] } },
+            };
+
+            var rna = new RNA("GAACUG", "name", "accession", "organism", "databaseFilePath",
+                null, null, oneBasedPossibleLocalizedModifications, false, false,
+                new Dictionary<string, string>());
+            var oligos = rna
+                .Digest(new RnaDigestionParams(maxMods: 1), new List<Modification>(), mods)
+                .ToList();
+
+            var clonedRna = rna.CreateNew(null, null, true);
+            var clonedOligo =  oligos.First().CreateNew(null, null, true);
+
+            // ensure they are identical except for the isDecoy field
+            // ensure they are identical except for the isDecoy field
+            Assert.That(rna.BaseSequence, Is.EqualTo(clonedRna.BaseSequence));
+            Assert.That(rna.OneBasedPossibleLocalizedModifications, Is.EqualTo(clonedRna.OneBasedPossibleLocalizedModifications));
+            Assert.That(rna.IsDecoy, Is.Not.EqualTo(clonedRna.IsDecoy));
+
+            Assert.That(oligos.First().BaseSequence, Is.EqualTo(clonedOligo.BaseSequence));
+            Assert.That(oligos.First().OneBasedPossibleLocalizedModifications, Is.EqualTo(clonedOligo.OneBasedPossibleLocalizedModifications));
+            Assert.That(oligos.First().Parent.IsDecoy, Is.Not.EqualTo(clonedOligo.Parent.IsDecoy));
+
+
+        }
     }
 }
diff --git a/mzLib/Test/Transcriptomics/TestDigestion.cs b/mzLib/Test/Transcriptomics/TestDigestion.cs
index 0a9c526eb..37abc0447 100644
--- a/mzLib/Test/Transcriptomics/TestDigestion.cs
+++ b/mzLib/Test/Transcriptomics/TestDigestion.cs
@@ -5,6 +5,7 @@
 using System.Linq;
 using Chemistry;
 using MassSpectrometry;
+using MzLibUtil;
 using NUnit.Framework;
 using Omics;
 using Omics.Digestion;
@@ -533,6 +534,11 @@ public static void OligoWithSetMods_CalculatedValues()
             Assert.That(oligoWithSetMods.MonoisotopicMass, Is.EqualTo(oldMonoMass + deltaMass).Within(0.01));
             Assert.That(oligoWithSetMods.MostAbundantMonoisotopicMass, Is.EqualTo(oldMostAbundantMass + deltaMass).Within(0.01));
             Assert.That(oligoWithSetMods.ThisChemicalFormula, Is.EqualTo(formula + formulaToAdd + formulaToAdd));
+
+            Assert.Throws<MzLibException>(() =>
+            {
+                var oligo = new OligoWithSetMods("GUA|GAUGUC", new Dictionary<string, Modification>());
+            });
         }
 
         #endregion
diff --git a/mzLib/Test/Transcriptomics/TestFragmentation.cs b/mzLib/Test/Transcriptomics/TestFragmentation.cs
index fea764246..76ddb8c3b 100644
--- a/mzLib/Test/Transcriptomics/TestFragmentation.cs
+++ b/mzLib/Test/Transcriptomics/TestFragmentation.cs
@@ -5,6 +5,7 @@
 using System.Linq;
 using Transcriptomics;
 using MassSpectrometry;
+using Omics;
 using Omics.Fragmentation;
 using Omics.Fragmentation.Oligo;
 using Omics.Modifications;
@@ -130,18 +131,25 @@ public void TestFragmentation_Modified(string sequence, string modString, string
             ProductType productType, double[] unmodifiedFragmentMass, double[] modifiedFragmentMasses)
         {
             var mods = PtmListLoader.ReadModsFromString(modString, out List<(Modification, string)> modsOut).ToList();
+            var modDict = mods.ToDictionary(p => p.IdWithMotif, p => p);
             var rna = new RNA(sequence);
 
-            var unmodifiedOligo = rna.Digest(new RnaDigestionParams(), new List<Modification>(), new List<Modification>())
-                .First() as OligoWithSetMods ?? throw new NullReferenceException();
+            var unmodifiedOligo = new OligoWithSetMods(sequence, new Dictionary<string, Modification>(),
+                 0, new RnaDigestionParams(), rna, 1, rna.Length);
             Assert.That(unmodifiedOligo.AllModsOneIsNterminus.Count, Is.EqualTo(0));
             Assert.That(unmodifiedOligo.FullSequence, Is.EqualTo(sequence));
+            Assert.That(unmodifiedOligo.SequenceWithChemicalFormulas, Is.EqualTo(sequence));
+            Assert.That(unmodifiedOligo.FullSequenceWithMassShift(), Is.EqualTo(sequence));
             Assert.That(unmodifiedOligo.MonoisotopicMass, Is.EqualTo(unmodifiedMass).Within(0.01));
 
-            var modifiedOligo = rna.Digest(new RnaDigestionParams(), mods, new List<Modification>())
-                .First() as OligoWithSetMods ?? throw new NullReferenceException();
+            var modifiedOligo = new OligoWithSetMods(fullSequence, modDict,
+                0, new RnaDigestionParams(), rna, 1, rna.Length);
+            var formulaSequence = fullSequence.Replace("Metal:Sodium on A", "H-1Na");
+            var massShiftSequence = fullSequence.Replace("Metal:Sodium on A", "+21.981944");
             Assert.That(modifiedOligo.AllModsOneIsNterminus.Count, Is.EqualTo(mods.Count));
             Assert.That(modifiedOligo.FullSequence, Is.EqualTo(fullSequence));
+            Assert.That(modifiedOligo.SequenceWithChemicalFormulas, Is.EqualTo(formulaSequence));
+            Assert.That(modifiedOligo.FullSequenceWithMassShift(), Is.EqualTo(massShiftSequence));
             Assert.That(modifiedOligo.MonoisotopicMass, Is.EqualTo(modifiedMass).Within(0.01));
 
             var unmodifiedProducts = unmodifiedOligo.GetNeutralFragments(productType).ToList();
diff --git a/mzLib/Transcriptomics/ClassExtensions.cs b/mzLib/Transcriptomics/ClassExtensions.cs
index 2ac37a67d..a5617863e 100644
--- a/mzLib/Transcriptomics/ClassExtensions.cs
+++ b/mzLib/Transcriptomics/ClassExtensions.cs
@@ -45,6 +45,7 @@ public static T CreateNew<T>(this T target, string? sequence = null, IDictionary
                 case OligoWithSetMods oligo:
                 {
                     var oldParent = oligo.Parent as RNA ?? throw new NullReferenceException();
+                    bool newIsDecoy = isDecoy ?? oldParent.IsDecoy;
                     var newParent = new RNA(
                         newSequence,
                         oldParent.Name,
@@ -55,7 +56,7 @@ public static T CreateNew<T>(this T target, string? sequence = null, IDictionary
                         oldParent.ThreePrimeTerminus,
                         newModifications,
                         oldParent.IsContaminant,
-                        oldParent.IsDecoy,
+                        newIsDecoy,
                         oldParent.AdditionalDatabaseFields);
 
                     returnObj = new OligoWithSetMods(
diff --git a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
index 2ef0ae3f2..f432b5fed 100644
--- a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
+++ b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
@@ -49,9 +49,10 @@ public OligoWithSetMods(string sequence, Dictionary<string, Modification> allKno
 
             FullSequence = sequence;
             _baseSequence = IBioPolymerWithSetMods.GetBaseSequenceFromFullSequence(sequence);
-            GetModsAfterDeserialization(allKnownMods);
+            _allModsOneIsNterminus = GetModsAfterDeserialization(allKnownMods);
             NumFixedMods = numFixedMods;
             _digestionParams = digestionParams;
+            Description = description;
 
             if (n != null)
                 Parent = n;
@@ -303,9 +304,9 @@ public IBioPolymerWithSetMods Localize(int j, double massToLocalize)
             return peptideWithLocalizedMass;
         }
 
-        private void GetModsAfterDeserialization(Dictionary<string, Modification> idToMod)
+        private Dictionary<int, Modification> GetModsAfterDeserialization(Dictionary<string, Modification> idToMod)
         {
-            _allModsOneIsNterminus = new Dictionary<int, Modification>();
+            var mods = new Dictionary<int, Modification>();
             int currentModStart = 0;
             int currentModificationLocation = 1;
             bool currentlyReadingMod = false;
@@ -355,7 +356,7 @@ private void GetModsAfterDeserialization(Dictionary<string, Modification> idToMo
                             currentModificationLocation = BaseSequence.Length + 2;
                         }
 
-                        _allModsOneIsNterminus.Add(currentModificationLocation, mod);
+                        mods.Add(currentModificationLocation, mod);
                         currentlyReadingMod = false;
                     }
                 }
@@ -365,6 +366,8 @@ private void GetModsAfterDeserialization(Dictionary<string, Modification> idToMo
                 }
                 //else do nothing
             }
+
+            return mods;
         }
     }
 }

From 5ec870db530937569d1beecbec0d78b6f03936be Mon Sep 17 00:00:00 2001
From: nbollis <nbollis@comcast.net>
Date: Wed, 25 Sep 2024 12:19:36 -0500
Subject: [PATCH 11/17] Broke out TerminusSpecificProductTypes class and
 removed unnecessary namespaces

---
 .../Oligo/DissociationTypeCollection.cs       | 109 +-------------
 .../Oligo/TerminusSpecificProductTypes.cs     | 141 ++++++++++++++++++
 mzLib/Test/Transcriptomics/TestProductType.cs |   2 +-
 mzLib/Transcriptomics/ClassExtensions.cs      |   4 -
 .../Interfaces/INucleicAcid.cs                |   1 -
 mzLib/Transcriptomics/NucleicAcid.cs          |   4 -
 mzLib/Transcriptomics/RNA.cs                  |   5 -
 7 files changed, 148 insertions(+), 118 deletions(-)
 create mode 100644 mzLib/Omics/Fragmentation/Oligo/TerminusSpecificProductTypes.cs

diff --git a/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
index 7b5a411ee..4302fadcb 100644
--- a/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
+++ b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
@@ -100,6 +100,12 @@ public static class DissociationTypeCollection
                                                                                                                                                                                                                      public static List<ProductType> GetRnaProductTypesFromDissociationType(this DissociationType dissociationType) =>
             ProductsFromDissociationType[dissociationType];
 
+        /// <summary>
+        /// Returns mass shift by product type
+        /// </summary>
+        /// <param name="type"></param>
+        /// <returns></returns>
+        public static double GetRnaMassShiftFromProductType(this ProductType type) => FragmentIonCaps[type].MonoisotopicMass;
 
         /// <summary>
         /// Mass to be added or subtracted
@@ -138,109 +144,6 @@ public static List<ProductType> GetRnaProductTypesFromDissociationType(this Diss
                 { ProductType.M, new ChemicalFormula() }
             };
 
-        /// <summary>
-        /// Returns mass shift by product type
-        /// </summary>
-        /// <param name="type"></param>
-        /// <returns></returns>
-        public static double GetRnaMassShiftFromProductType(this ProductType type) => FragmentIonCaps[type].MonoisotopicMass;
-
-        public static FragmentationTerminus GetRnaTerminusType(this ProductType fragmentType)
-        {
-            switch (fragmentType)
-            {
-                case ProductType.a:
-                case ProductType.aWaterLoss:
-                case ProductType.aBaseLoss:
-                case ProductType.b:
-                case ProductType.bWaterLoss:
-                case ProductType.bBaseLoss:
-                case ProductType.c:
-                case ProductType.cWaterLoss:
-                case ProductType.cBaseLoss:
-                case ProductType.d:
-                case ProductType.dWaterLoss:
-                case ProductType.dBaseLoss:
-                    return FragmentationTerminus.FivePrime;
-
-                case ProductType.w:
-                case ProductType.wWaterLoss:
-                case ProductType.wBaseLoss:
-                case ProductType.x:
-                case ProductType.xWaterLoss:
-                case ProductType.xBaseLoss:
-                case ProductType.y:
-                case ProductType.yWaterLoss:
-                case ProductType.yBaseLoss:
-                case ProductType.z:
-                case ProductType.zWaterLoss:
-                case ProductType.zBaseLoss:
-                    return FragmentationTerminus.ThreePrime;
-
-                case ProductType.M:
-                    return FragmentationTerminus.None;
-
-                case ProductType.aStar:
-                case ProductType.aDegree:
-                case ProductType.bAmmoniaLoss:
-                case ProductType.yAmmoniaLoss:
-                case ProductType.zPlusOne:
-                case ProductType.D:
-                case ProductType.Ycore:
-                case ProductType.Y:
-                default:
-                    throw new ArgumentOutOfRangeException(nameof(fragmentType), fragmentType, null);
-            }
-        }
-
-        /// <summary>
-        /// Product ion types by Fragmentation Terminus
-        /// </summary>
-        private static readonly Dictionary<FragmentationTerminus, List<ProductType>>
-            ProductIonTypesFromSpecifiedTerminus = new Dictionary<FragmentationTerminus, List<ProductType>>
-            {
-                {
-                    FragmentationTerminus.FivePrime, new List<ProductType>
-                    {
-                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,
-                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,
-                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,
-                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss, 
-                    }
-                },
-                {
-                    FragmentationTerminus.ThreePrime, new List<ProductType>
-                    {
-                        ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,
-                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,
-                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,
-                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,
-                    }
-                },
-                {
-                    FragmentationTerminus.Both, new List<ProductType>
-                    {
-
-                        ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,
-                        ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,
-                        ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,
-                        ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss, 
-                        ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,
-                        ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,
-                        ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,
-                        ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,
-                        ProductType.M
-                    }
-                }
-            };
-
-
-        public static List<ProductType> GetRnaTerminusSpecificProductTypes(
-            this FragmentationTerminus fragmentationTerminus)
-        {
-            return ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus];
-        }
-
         /// <summary>
         /// Returns all product ion types based upon specified terminus
         /// </summary>
diff --git a/mzLib/Omics/Fragmentation/Oligo/TerminusSpecificProductTypes.cs b/mzLib/Omics/Fragmentation/Oligo/TerminusSpecificProductTypes.cs
new file mode 100644
index 000000000..0ec5541cd
--- /dev/null
+++ b/mzLib/Omics/Fragmentation/Oligo/TerminusSpecificProductTypes.cs
@@ -0,0 +1,141 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Omics.Fragmentation.Oligo
+{
+    public static class TerminusSpecificProductTypes
+    {
+        public static List<ProductType> GetRnaTerminusSpecificProductTypes(
+            this FragmentationTerminus fragmentationTerminus)
+        {
+            return ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus];
+        }
+
+        /// <summary>
+        /// The types of ions that can be generated from an oligo fragment, based on the terminus of the fragment
+        /// </summary>
+        public static Dictionary<FragmentationTerminus, List<ProductType>> ProductIonTypesFromSpecifiedTerminus = new Dictionary<FragmentationTerminus, List<ProductType>>
+        {
+            { 
+                FragmentationTerminus.FivePrime, new List<ProductType>
+                {
+                    ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,
+                    ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,
+                    ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,
+                    ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,
+                }
+            },
+            { 
+                FragmentationTerminus.ThreePrime, new List<ProductType>
+                {
+                    ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,
+                    ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,
+                    ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,
+                    ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,
+                }
+            },
+            { 
+                FragmentationTerminus.Both, new List<ProductType>
+                {
+
+                    ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,
+                    ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,
+                    ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,
+                    ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,
+                    ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,
+                    ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,
+                    ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,
+                    ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,
+                    ProductType.M
+                }
+
+            },
+            { 
+                FragmentationTerminus.None, new List<ProductType>()
+            }
+        };
+
+
+        public static FragmentationTerminus GetRnaTerminusType(this ProductType fragmentType)
+        {
+            switch (fragmentType)
+            {
+                case ProductType.a:
+                case ProductType.aWaterLoss:
+                case ProductType.aBaseLoss:
+                case ProductType.b:
+                case ProductType.bWaterLoss:
+                case ProductType.bBaseLoss:
+                case ProductType.c:
+                case ProductType.cWaterLoss:
+                case ProductType.cBaseLoss:
+                case ProductType.d:
+                case ProductType.dWaterLoss:
+                case ProductType.dBaseLoss:
+                case ProductType.w:
+                case ProductType.wWaterLoss:
+                case ProductType.wBaseLoss:
+                case ProductType.x:
+                case ProductType.xWaterLoss:
+                case ProductType.xBaseLoss:
+                case ProductType.y:
+                case ProductType.yWaterLoss:
+                case ProductType.yBaseLoss:
+                case ProductType.z:
+                case ProductType.zWaterLoss:
+                case ProductType.zBaseLoss:
+                case ProductType.M:
+                    return ProductTypeToFragmentationTerminus[fragmentType];
+
+                case ProductType.aStar:
+                case ProductType.aDegree:
+                case ProductType.bAmmoniaLoss:
+                case ProductType.yAmmoniaLoss:
+                case ProductType.zPlusOne:
+                case ProductType.D:
+                case ProductType.Ycore:
+                case ProductType.Y:
+                default:
+                    throw new ArgumentOutOfRangeException(nameof(fragmentType), fragmentType, null);
+            }
+        }
+
+
+        /// <summary>
+        /// The terminus of the oligo fragment that the product ion is generated from
+        /// </summary>
+        public static Dictionary<ProductType, FragmentationTerminus> ProductTypeToFragmentationTerminus = new Dictionary<ProductType, FragmentationTerminus>
+        {
+            { ProductType.a, FragmentationTerminus.FivePrime },
+            { ProductType.aWaterLoss, FragmentationTerminus.FivePrime },
+            { ProductType.aBaseLoss, FragmentationTerminus.FivePrime },
+            { ProductType.b, FragmentationTerminus.FivePrime },
+            { ProductType.bWaterLoss, FragmentationTerminus.FivePrime },
+            { ProductType.bBaseLoss, FragmentationTerminus.FivePrime },
+            { ProductType.c, FragmentationTerminus.FivePrime },
+            { ProductType.cWaterLoss, FragmentationTerminus.FivePrime },
+            { ProductType.cBaseLoss, FragmentationTerminus.FivePrime },
+            { ProductType.d, FragmentationTerminus.FivePrime },
+            { ProductType.dWaterLoss, FragmentationTerminus.FivePrime },
+            { ProductType.dBaseLoss, FragmentationTerminus.FivePrime },
+
+            { ProductType.w, FragmentationTerminus.ThreePrime },
+            { ProductType.wWaterLoss, FragmentationTerminus.ThreePrime },
+            { ProductType.wBaseLoss, FragmentationTerminus.ThreePrime },
+            { ProductType.x, FragmentationTerminus.ThreePrime },
+            { ProductType.xWaterLoss, FragmentationTerminus.ThreePrime },
+            { ProductType.xBaseLoss, FragmentationTerminus.ThreePrime },
+            { ProductType.y, FragmentationTerminus.ThreePrime },
+            { ProductType.yWaterLoss, FragmentationTerminus.ThreePrime },
+            { ProductType.yBaseLoss, FragmentationTerminus.ThreePrime },
+            { ProductType.z, FragmentationTerminus.ThreePrime },
+            { ProductType.zWaterLoss, FragmentationTerminus.ThreePrime },
+            { ProductType.zBaseLoss, FragmentationTerminus.ThreePrime },
+
+            { ProductType.M, FragmentationTerminus.Both }
+        };
+    }
+}
diff --git a/mzLib/Test/Transcriptomics/TestProductType.cs b/mzLib/Test/Transcriptomics/TestProductType.cs
index f9c459211..15757f4d2 100644
--- a/mzLib/Test/Transcriptomics/TestProductType.cs
+++ b/mzLib/Test/Transcriptomics/TestProductType.cs
@@ -228,7 +228,7 @@ public void TestProductTypes_GetRnaTerminusType()
                         break;
 
                     case ProductType.M:
-                        Assert.That(type.GetRnaTerminusType(), Is.EqualTo(FragmentationTerminus.None));
+                        Assert.That(type.GetRnaTerminusType(), Is.EqualTo(FragmentationTerminus.Both));
                         break;
 
                     case ProductType.aStar:
diff --git a/mzLib/Transcriptomics/ClassExtensions.cs b/mzLib/Transcriptomics/ClassExtensions.cs
index a5617863e..4a6a52962 100644
--- a/mzLib/Transcriptomics/ClassExtensions.cs
+++ b/mzLib/Transcriptomics/ClassExtensions.cs
@@ -1,9 +1,5 @@
 ﻿using Omics.Modifications;
-using System;
-using System.Collections.Generic;
-using System.Linq;
 using System.Text;
-using System.Threading.Tasks;
 using Transcriptomics.Digestion;
 
 namespace Transcriptomics
diff --git a/mzLib/Transcriptomics/Interfaces/INucleicAcid.cs b/mzLib/Transcriptomics/Interfaces/INucleicAcid.cs
index d2052aee3..4e3e95e4d 100644
--- a/mzLib/Transcriptomics/Interfaces/INucleicAcid.cs
+++ b/mzLib/Transcriptomics/Interfaces/INucleicAcid.cs
@@ -1,5 +1,4 @@
 ﻿using Chemistry;
-using Omics;
 using Omics.Modifications;
 
 namespace Transcriptomics
diff --git a/mzLib/Transcriptomics/NucleicAcid.cs b/mzLib/Transcriptomics/NucleicAcid.cs
index db6f18f43..c99934f75 100644
--- a/mzLib/Transcriptomics/NucleicAcid.cs
+++ b/mzLib/Transcriptomics/NucleicAcid.cs
@@ -2,11 +2,7 @@
 using Omics.Digestion;
 using Omics.Modifications;
 using Omics;
-using System;
-using System.Collections.Generic;
-using System.Linq;
 using System.Text;
-using System.Threading.Tasks;
 using Transcriptomics.Digestion;
 
 namespace Transcriptomics
diff --git a/mzLib/Transcriptomics/RNA.cs b/mzLib/Transcriptomics/RNA.cs
index 3e72c1f14..af6798cea 100644
--- a/mzLib/Transcriptomics/RNA.cs
+++ b/mzLib/Transcriptomics/RNA.cs
@@ -1,10 +1,5 @@
 ﻿using Chemistry;
 using Omics.Modifications;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
 
 namespace Transcriptomics
 {

From bafa5179f4de022f64d380674d1035985d0adefa Mon Sep 17 00:00:00 2001
From: Nic Bollis <nbollis@comcast.net>
Date: Fri, 27 Sep 2024 09:51:04 -0500
Subject: [PATCH 12/17] Update ProteinXmlEntry.cs

---
 mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs b/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
index 698a1c51c..3e9bb5a34 100644
--- a/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
+++ b/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
@@ -406,19 +406,6 @@ private static void ParseAnnotatedMods(Dictionary<int, List<Modification>> desti
             }
         }
 
-        private static ModificationMotif GetMotif(string proteinSequence, int position)
-        {
-            string aminoAcid = proteinSequence.Substring(position - 1, 1);
-            if (ModificationMotif.TryGetMotif(aminoAcid, out ModificationMotif motif))
-            {
-                return motif;
-            }
-            else
-            {
-                return null;
-            }
-        }
-
         /// <summary>
         /// Finish parsing a database reference element
         /// </summary>

From 0bbad42065826bd239794a9d6580d5a9e7dc51a8 Mon Sep 17 00:00:00 2001
From: Nic Bollis <nbollis@wisc.edu>
Date: Fri, 27 Sep 2024 10:19:52 -0500
Subject: [PATCH 13/17] Added gene name to RNA constructore

---
 mzLib/Transcriptomics/NucleicAcid.cs               | 3 ++-
 mzLib/Transcriptomics/RNA.cs                       | 4 ++--
 mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/mzLib/Transcriptomics/NucleicAcid.cs b/mzLib/Transcriptomics/NucleicAcid.cs
index c99934f75..2cc9de6c0 100644
--- a/mzLib/Transcriptomics/NucleicAcid.cs
+++ b/mzLib/Transcriptomics/NucleicAcid.cs
@@ -60,7 +60,7 @@ protected NucleicAcid(string sequence, IHasChemicalFormula? fivePrimeTerm = null
         protected NucleicAcid(string sequence, string name, string identifier, string organism, string databaseFilePath,
             IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null,
             IDictionary<int, List<Modification>>? oneBasedPossibleLocalizedModifications = null,
-            bool isContaminant = false, bool isDecoy = false,
+            bool isContaminant = false, bool isDecoy = false, List<Tuple<string, string>>? geneNames = null,
             Dictionary<string, string>? additionalDatabaseFields = null)
             : this(sequence, fivePrimeTerm, threePrimeTerm, oneBasedPossibleLocalizedModifications)
         {
@@ -71,6 +71,7 @@ protected NucleicAcid(string sequence, string name, string identifier, string or
             Organism = organism;
             Accession = identifier;
             AdditionalDatabaseFields = additionalDatabaseFields;
+            GeneNames = geneNames ?? new List<Tuple<string, string>>();
         }
 
         #endregion
diff --git a/mzLib/Transcriptomics/RNA.cs b/mzLib/Transcriptomics/RNA.cs
index af6798cea..5d5fcb2f6 100644
--- a/mzLib/Transcriptomics/RNA.cs
+++ b/mzLib/Transcriptomics/RNA.cs
@@ -35,10 +35,10 @@ public RNA(string sequence, IHasChemicalFormula? fivePrimeTerm = null, IHasChemi
         public RNA(string sequence, string name, string identifier, string organism, string databaseFilePath,
             IHasChemicalFormula? fivePrimeTerminus = null, IHasChemicalFormula? threePrimeTerminus = null,
             IDictionary<int, List<Modification>>? oneBasedPossibleModifications = null,
-            bool isContaminant = false, bool isDecoy = false,
+            bool isContaminant = false, bool isDecoy = false, List<Tuple<string, string>> geneNames = null,
             Dictionary<string, string>? databaseAdditionalFields = null)
             : base(sequence, name, identifier, organism, databaseFilePath, fivePrimeTerminus, threePrimeTerminus,
-                oneBasedPossibleModifications, isContaminant, isDecoy, databaseAdditionalFields)
+                oneBasedPossibleModifications, isContaminant, isDecoy, geneNames, databaseAdditionalFields)
         {
 
         }
diff --git a/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs b/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
index 3e9bb5a34..becfa2cfa 100644
--- a/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
+++ b/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
@@ -248,7 +248,7 @@ internal RNA ParseRnaEntryEndElement(XmlReader xml, bool isContaminant, string r
 
                 ParseAnnotatedMods(OneBasedModifications, modTypesToExclude, unknownModifications, AnnotatedMods);
                 result = new RNA(Sequence, Name, Accession, Organism, rnaDbLocation, null,
-                    null, OneBasedModifications, isContaminant, false, null);
+                    null, OneBasedModifications, isContaminant, false, GeneNames, null);
             }
             Clear();
             return result;

From 5e852a1ea492f73702768820036f102168a4f603 Mon Sep 17 00:00:00 2001
From: Nic Bollis <nbollis@wisc.edu>
Date: Fri, 27 Sep 2024 10:19:52 -0500
Subject: [PATCH 14/17] Added gene name to RNA constructore

---
 mzLib/Test/Transcriptomics/TestDecoyGenerator.cs              | 2 +-
 mzLib/Transcriptomics/ClassExtensions.cs                      | 3 ++-
 mzLib/Transcriptomics/NucleicAcid.cs                          | 3 ++-
 mzLib/Transcriptomics/RNA.cs                                  | 4 ++--
 mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs            | 2 +-
 .../UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs  | 2 +-
 6 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs b/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs
index 800126b1a..1b81a0d5a 100644
--- a/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs
+++ b/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs
@@ -240,7 +240,7 @@ public void TestCreateNew()
             };
 
             var rna = new RNA("GAACUG", "name", "accession", "organism", "databaseFilePath",
-                null, null, oneBasedPossibleLocalizedModifications, false, false,
+                null, null, oneBasedPossibleLocalizedModifications, false, false, new List<Tuple<string, string>>(),
                 new Dictionary<string, string>());
             var oligos = rna
                 .Digest(new RnaDigestionParams(maxMods: 1), new List<Modification>(), mods)
diff --git a/mzLib/Transcriptomics/ClassExtensions.cs b/mzLib/Transcriptomics/ClassExtensions.cs
index 4a6a52962..ef56c737d 100644
--- a/mzLib/Transcriptomics/ClassExtensions.cs
+++ b/mzLib/Transcriptomics/ClassExtensions.cs
@@ -35,7 +35,7 @@ public static T CreateNew<T>(this T target, string? sequence = null, IDictionary
                 {
                     bool newIsDecoy = isDecoy ?? rna.IsDecoy;
                     returnObj = new RNA(newSequence, rna.Name, rna.Accession, rna.Organism, rna.DatabaseFilePath,
-                        rna.FivePrimeTerminus, rna.ThreePrimeTerminus, newModifications, rna.IsContaminant, newIsDecoy, rna.AdditionalDatabaseFields);
+                        rna.FivePrimeTerminus, rna.ThreePrimeTerminus, newModifications, rna.IsContaminant, newIsDecoy, rna.GeneNames.ToList(), rna.AdditionalDatabaseFields);
                     break;
                 }
                 case OligoWithSetMods oligo:
@@ -53,6 +53,7 @@ public static T CreateNew<T>(this T target, string? sequence = null, IDictionary
                         newModifications,
                         oldParent.IsContaminant,
                         newIsDecoy,
+                        oldParent.GeneNames.ToList(),
                         oldParent.AdditionalDatabaseFields);
 
                     returnObj = new OligoWithSetMods(
diff --git a/mzLib/Transcriptomics/NucleicAcid.cs b/mzLib/Transcriptomics/NucleicAcid.cs
index c99934f75..2cc9de6c0 100644
--- a/mzLib/Transcriptomics/NucleicAcid.cs
+++ b/mzLib/Transcriptomics/NucleicAcid.cs
@@ -60,7 +60,7 @@ protected NucleicAcid(string sequence, IHasChemicalFormula? fivePrimeTerm = null
         protected NucleicAcid(string sequence, string name, string identifier, string organism, string databaseFilePath,
             IHasChemicalFormula? fivePrimeTerm = null, IHasChemicalFormula? threePrimeTerm = null,
             IDictionary<int, List<Modification>>? oneBasedPossibleLocalizedModifications = null,
-            bool isContaminant = false, bool isDecoy = false,
+            bool isContaminant = false, bool isDecoy = false, List<Tuple<string, string>>? geneNames = null,
             Dictionary<string, string>? additionalDatabaseFields = null)
             : this(sequence, fivePrimeTerm, threePrimeTerm, oneBasedPossibleLocalizedModifications)
         {
@@ -71,6 +71,7 @@ protected NucleicAcid(string sequence, string name, string identifier, string or
             Organism = organism;
             Accession = identifier;
             AdditionalDatabaseFields = additionalDatabaseFields;
+            GeneNames = geneNames ?? new List<Tuple<string, string>>();
         }
 
         #endregion
diff --git a/mzLib/Transcriptomics/RNA.cs b/mzLib/Transcriptomics/RNA.cs
index af6798cea..5d5fcb2f6 100644
--- a/mzLib/Transcriptomics/RNA.cs
+++ b/mzLib/Transcriptomics/RNA.cs
@@ -35,10 +35,10 @@ public RNA(string sequence, IHasChemicalFormula? fivePrimeTerm = null, IHasChemi
         public RNA(string sequence, string name, string identifier, string organism, string databaseFilePath,
             IHasChemicalFormula? fivePrimeTerminus = null, IHasChemicalFormula? threePrimeTerminus = null,
             IDictionary<int, List<Modification>>? oneBasedPossibleModifications = null,
-            bool isContaminant = false, bool isDecoy = false,
+            bool isContaminant = false, bool isDecoy = false, List<Tuple<string, string>> geneNames = null,
             Dictionary<string, string>? databaseAdditionalFields = null)
             : base(sequence, name, identifier, organism, databaseFilePath, fivePrimeTerminus, threePrimeTerminus,
-                oneBasedPossibleModifications, isContaminant, isDecoy, databaseAdditionalFields)
+                oneBasedPossibleModifications, isContaminant, isDecoy, geneNames, databaseAdditionalFields)
         {
 
         }
diff --git a/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs b/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
index 3e9bb5a34..becfa2cfa 100644
--- a/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
+++ b/mzLib/UsefulProteomicsDatabases/ProteinXmlEntry.cs
@@ -248,7 +248,7 @@ internal RNA ParseRnaEntryEndElement(XmlReader xml, bool isContaminant, string r
 
                 ParseAnnotatedMods(OneBasedModifications, modTypesToExclude, unknownModifications, AnnotatedMods);
                 result = new RNA(Sequence, Name, Accession, Organism, rnaDbLocation, null,
-                    null, OneBasedModifications, isContaminant, false, null);
+                    null, OneBasedModifications, isContaminant, false, GeneNames, null);
             }
             Clear();
             return result;
diff --git a/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs b/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs
index 13aa82fb2..2e80c090c 100644
--- a/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs
+++ b/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDbLoader.cs
@@ -144,7 +144,7 @@ public static List<RNA> LoadRnaFasta(string rnaDbLocation, bool generateTargets,
 
                         RNA rna = new RNA(sequence, name, identifier, organism, rnaDbLocation,
                             fivePrimeTerm, threePrimeTerm, null,
-                            isContaminant, false, additonalDatabaseFields);
+                            isContaminant, false, null, additonalDatabaseFields);
                         if (rna.Length == 0)
                             errors.Add("Line" + line + ", Rna length of 0: " + rna.Name + "was skipped from database: " + rnaDbLocation);
                         else

From 501ef559ac683764732db7be5ee9616c209fc20d Mon Sep 17 00:00:00 2001
From: nbollis <nbollis@comcast.net>
Date: Tue, 1 Oct 2024 18:34:33 -0500
Subject: [PATCH 15/17] Refactor and enhance exception handling and tests

Refactored constructors, improved exception handling, and added comprehensive tests across multiple files. Key changes include:

- `MzLibException.cs`: Updated constructor to include `innerException`.
- `TestDecoyGenerator.cs`: Added assertions for `CreateNew` method.
- `TestDigestion.cs`: Added assertions and new test for RNA digestion exception.
- Refactored modification lists and added various tests for modifications.
- `TestNucleicAcid.cs`: Refactored methods, adjusted precision, and updated terminus assignments.
- `NucleolyticOligo.cs`: Changed parameter types, updated comments, and improved variable names.
- `OligoWithSetMods.cs`: Enhanced exception messages and updated modification location checks.
- `NucleicAcid.cs`: Added `using` directive, changed exception type, and refactored methods.
- `mzLib.sln.DotSettings`: Updated user dictionary entries.
---
 mzLib/MzLibUtil/MzLibException.cs             |   9 +-
 .../Transcriptomics/TestDecoyGenerator.cs     |  17 +-
 mzLib/Test/Transcriptomics/TestDigestion.cs   | 418 ++++++++++++++++--
 mzLib/Test/Transcriptomics/TestNucleicAcid.cs |  21 +-
 .../Digestion/NucleolyticOligo.cs             |  26 +-
 .../Digestion/OligoWithSetMods.cs             |   4 +-
 mzLib/Transcriptomics/NucleicAcid.cs          |  14 +-
 mzLib/mzLib.sln.DotSettings                   |   1 +
 8 files changed, 443 insertions(+), 67 deletions(-)

diff --git a/mzLib/MzLibUtil/MzLibException.cs b/mzLib/MzLibUtil/MzLibException.cs
index cf86074d8..61ecc8d6b 100644
--- a/mzLib/MzLibUtil/MzLibException.cs
+++ b/mzLib/MzLibUtil/MzLibException.cs
@@ -3,11 +3,6 @@
 namespace MzLibUtil
 {
     [Serializable]
-    public class MzLibException : Exception
-    {
-        public MzLibException(string message)
-            : base(message)
-        {
-        }
-    }
+    public class MzLibException(string message, Exception innerException = null) 
+        : Exception(message, innerException);
 }
\ No newline at end of file
diff --git a/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs b/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs
index 800126b1a..8a098e45f 100644
--- a/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs
+++ b/mzLib/Test/Transcriptomics/TestDecoyGenerator.cs
@@ -249,7 +249,6 @@ public void TestCreateNew()
             var clonedRna = rna.CreateNew(null, null, true);
             var clonedOligo =  oligos.First().CreateNew(null, null, true);
 
-            // ensure they are identical except for the isDecoy field
             // ensure they are identical except for the isDecoy field
             Assert.That(rna.BaseSequence, Is.EqualTo(clonedRna.BaseSequence));
             Assert.That(rna.OneBasedPossibleLocalizedModifications, Is.EqualTo(clonedRna.OneBasedPossibleLocalizedModifications));
@@ -260,6 +259,22 @@ public void TestCreateNew()
             Assert.That(oligos.First().Parent.IsDecoy, Is.Not.EqualTo(clonedOligo.Parent.IsDecoy));
 
 
+            var newMods = new Dictionary<int, List<Modification>>()
+            {
+                { 1, new List<Modification>() { modDict["Sodium on A"] } },
+                { 2, new List<Modification>() { modDict["Sodium on A"] } },
+                { 3, new List<Modification>() { modDict["Sodium on A"] } },
+            };
+            clonedRna = rna.CreateNew("AAAAAA", newMods, null);
+            clonedOligo = oligos.First().CreateNew("AAAAAA", newMods, null);
+
+            Assert.That(rna.BaseSequence, Is.Not.EqualTo(clonedRna.BaseSequence));
+            Assert.That(rna.OneBasedPossibleLocalizedModifications, Is.Not.EqualTo(clonedRna.OneBasedPossibleLocalizedModifications));
+            Assert.That(rna.IsDecoy, Is.EqualTo(clonedRna.IsDecoy));
+
+            Assert.That(oligos.First().BaseSequence, Is.Not.EqualTo(clonedOligo.BaseSequence));
+            Assert.That(oligos.First().OneBasedPossibleLocalizedModifications, Is.Not.EqualTo(clonedOligo.OneBasedPossibleLocalizedModifications));
+            Assert.That(oligos.First().Parent.IsDecoy, Is.EqualTo(clonedOligo.Parent.IsDecoy));
         }
     }
 }
diff --git a/mzLib/Test/Transcriptomics/TestDigestion.cs b/mzLib/Test/Transcriptomics/TestDigestion.cs
index 37abc0447..a46017c0e 100644
--- a/mzLib/Test/Transcriptomics/TestDigestion.cs
+++ b/mzLib/Test/Transcriptomics/TestDigestion.cs
@@ -512,6 +512,7 @@ public static void OligoWithSetMods_CalculatedValues()
             Assert.That(oligoWithSetMods.NumMods, Is.EqualTo(1));
             Assert.That(oligoWithSetMods.NumFixedMods, Is.EqualTo(1));
             Assert.That(oligoWithSetMods.NumVariableMods, Is.EqualTo(0));
+            Assert.That(oligoWithSetMods.CleavageSpecificityForFdrCategory, Is.EqualTo(CleavageSpecificity.Full));
 
             var formula = oligoWithSetMods.ThisChemicalFormula;
             Assert.That(formula, Is.EqualTo(rnaFormula + sodiumAdduct.ChemicalFormula));
@@ -580,6 +581,7 @@ public void TestDigestionParamsClone()
             Assert.That(digestionParams.MaxLength, Is.EqualTo(cloned.MaxLength));
             Assert.That(digestionParams.MaxMods, Is.EqualTo(cloned.MaxMods));
             Assert.That(digestionParams.FragmentationTerminus, Is.Not.EqualTo(cloned.FragmentationTerminus));
+            Assert.That(digestionParams.SearchModeType, Is.EqualTo(CleavageSpecificity.Full));
             Assert.That(cloned.FragmentationTerminus, Is.EqualTo(FragmentationTerminus.C));
 
             // do not set new terminus, all values are retained
@@ -590,6 +592,7 @@ public void TestDigestionParamsClone()
             Assert.That(digestionParams.MaxLength, Is.EqualTo(cloned.MaxLength));
             Assert.That(digestionParams.MaxMods, Is.EqualTo(cloned.MaxMods));
             Assert.That(digestionParams.FragmentationTerminus, Is.EqualTo(cloned.FragmentationTerminus));
+            Assert.That(digestionParams.SearchModeType, Is.EqualTo(CleavageSpecificity.Full));
             Assert.That(cloned.FragmentationTerminus, Is.EqualTo(FragmentationTerminus.Both));
         }
 
@@ -651,25 +654,53 @@ public void TestNucleicAcid_Digestion_WithoutMods_MonoMasses(RnaDigestionTestCas
             }
         }
 
+        [Test]
+        public static void TestNucleicAcid_Digestion_Exception()
+        {
+            IDigestionParams digestionParams = new Proteomics.ProteolyticDigestion.DigestionParams();
+            var rna = new RNA("GUACUGGUACUG");
+
+            try
+            {
+                var result = rna.Digest(digestionParams, new List<Modification>(), new List<Modification>());
+            }
+            catch (Exception e)
+            {
+                Assert.That(e, Is.TypeOf<MzLibException>());
+                Assert.That(e.InnerException, Is.TypeOf<ArgumentException>());
+            }
+        }
+
         #endregion
 
         #region Digestion with Modifications
 
+        public static List<Modification> SodiumAdducts =>
+            PtmListLoader.ReadModsFromString("ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A or C or G or U\r\nCF   Na1H-1\r\n" + @"//",
+                    out List<(Modification, string)> mods).ToList();
+
+        public static List<Modification> PotassiumAdducts =>
+            PtmListLoader.ReadModsFromString("ID   Potassium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A or C or G or U\r\nCF   K1H-1\r\n" + @"//",
+                    out List<(Modification, string)> mods).ToList();
+
+        public static List<Modification> TerminalSodiumAdducts =>
+            PtmListLoader.ReadModsFromString("ID   Sodium\r\nMT   Metal\r\nPP   3'-terminal.\r\nTG   A or C or G or U\r\nCF   Na1H-1\r\n" + @"//",
+            out List<(Modification, string)> mods).ToList();
+
+        public static List<Modification> TerminalPotassiumAdducts =>
+            PtmListLoader.ReadModsFromString("ID   Potassium\r\nMT   Metal\r\nPP   5'-terminal.\r\nTG   A or C or G or U\r\nCF   K1H-1\r\n" + @"//",
+            out List<(Modification, string)> mods).ToList();
+
         [Test]
         public static void TestVariableModsCountCorrect()
         {
-            string modText = "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A or C or G or U\r\nCF   Na1H-1\r\n" + @"//";
-            var sodiumAdducts = PtmListLoader.ReadModsFromString(modText, out List<(Modification, string)> mods)
-                .ToList();
-            Assert.That(sodiumAdducts.Count, Is.EqualTo(4));
-
             var rna = new RNA("GUACUG");
             var rnaDigestionParams = new RnaDigestionParams()
             {
                 MaxMods = 1,
             };
 
-            var precursors = rna.Digest(rnaDigestionParams, new List<Modification>(), sodiumAdducts)
+            var precursors = rna.Digest(rnaDigestionParams, new List<Modification>(), SodiumAdducts)
                 .ToList();
             Assert.That(precursors.Count, Is.EqualTo(7));
             var fullSequences = precursors.Select(p => p.FullSequence).ToList();
@@ -682,7 +713,7 @@ public static void TestVariableModsCountCorrect()
             Assert.That(fullSequences.Contains("GUACUG[Metal:Sodium on G]"));
 
             rnaDigestionParams.MaxMods = 2;
-            precursors = rna.Digest(rnaDigestionParams, new List<Modification>(), sodiumAdducts)
+            precursors = rna.Digest(rnaDigestionParams, new List<Modification>(), SodiumAdducts)
                 .ToList();
             Assert.That(precursors.Count, Is.EqualTo(22));
             fullSequences = precursors.Select(p => p.FullSequence).ToList();
@@ -713,9 +744,7 @@ public static void TestVariableModsCountCorrect()
         [Test]
         public static void TestFixedModsCountCorrect()
         {
-            string modText = "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A\r\nCF   Na1H-1\r\n" + @"//";
-            var sodiumAdduct = PtmListLoader.ReadModsFromString(modText, out List<(Modification, string)> mods)
-                .ToList();
+            var sodiumAdduct = new List<Modification>() { SodiumAdducts[0] };
 
             var rna = new RNA("GUACUG");
             var rnaDigestionParams = new RnaDigestionParams()
@@ -729,9 +758,7 @@ public static void TestFixedModsCountCorrect()
             Assert.That(precursors.First().FullSequence, Is.EqualTo("GUA[Metal:Sodium on A]CUG"));
             Assert.That(precursors.First().MonoisotopicMass, Is.EqualTo(1896.26).Within(0.01));
 
-            modText = "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   G\r\nCF   Na1H-1\r\n" + @"//";
-            sodiumAdduct = PtmListLoader.ReadModsFromString(modText, out mods)
-                .ToList();
+            sodiumAdduct = new List<Modification>() { SodiumAdducts[2] };
 
             precursors = rna.Digest(rnaDigestionParams, sodiumAdduct, new List<Modification>())
                 .ToList();
@@ -744,22 +771,12 @@ public static void TestFixedModsCountCorrect()
         [Test]
         public static void TestFixedAndVariableMods()
         {
-            string modText = "ID   Sodium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A or C or G or U\r\nCF   Na1H-1\r\n" + @"//";
-            string modText2 = "ID   Potassium\r\nMT   Metal\r\nPP   Anywhere.\r\nTG   A or C or G or U\r\nCF   K1H-1\r\n" + @"//";
-            var sodiumAdducts = PtmListLoader.ReadModsFromString(modText, out List<(Modification, string)> mods)
-                .ToList();
-            var potassiumAdducts = PtmListLoader.ReadModsFromString(modText2, out mods)
-                .ToList();
-
-            Assert.That(sodiumAdducts.Count, Is.EqualTo(4));
-            Assert.That(potassiumAdducts.Count, Is.EqualTo(4));
-
             var rna = new RNA("GUACUG");
             var rnaDigestionParams = new RnaDigestionParams();
 
             rnaDigestionParams.MaxMods = 1;
-            var fixedMods = new List<Modification> { potassiumAdducts[0] }; // A
-            var variableMods = new List<Modification> { sodiumAdducts[1] }; // C
+            var fixedMods = new List<Modification> { PotassiumAdducts[0] }; // A
+            var variableMods = new List<Modification> { SodiumAdducts[1] }; // C
             var precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
                 .ToList();
 
@@ -774,8 +791,8 @@ public static void TestFixedAndVariableMods()
             Assert.That(oneOfEach.NumVariableMods, Is.EqualTo(1));
             Assert.That(oneOfEach.NumMods, Is.EqualTo(2));
 
-            fixedMods = new List<Modification> { potassiumAdducts[2] }; // G
-            variableMods = new List<Modification> { sodiumAdducts[1] }; // C
+            fixedMods = new List<Modification> { PotassiumAdducts[2] }; // G
+            variableMods = new List<Modification> { SodiumAdducts[1] }; // C
             precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
                 .ToList();
             fullSequences = precursors.Select(p => p.FullSequence).ToList();
@@ -784,8 +801,8 @@ public static void TestFixedAndVariableMods()
             Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Potassium on G]"));
             Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UAC[Metal:Sodium on C]UG[Metal:Potassium on G]"));
 
-            fixedMods = new List<Modification> { potassiumAdducts[2] }; // G
-            variableMods = new List<Modification> { sodiumAdducts[1], sodiumAdducts[3] }; // C, U
+            fixedMods = new List<Modification> { PotassiumAdducts[2] }; // G
+            variableMods = new List<Modification> { SodiumAdducts[1], SodiumAdducts[3] }; // C, U
             precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
                 .ToList();
             fullSequences = precursors.Select(p => p.FullSequence).ToList();
@@ -811,6 +828,349 @@ public static void TestFixedAndVariableMods()
             Assert.That(fullSequences.Contains("G[Metal:Potassium on G]U[Metal:Sodium on U]AC[Metal:Sodium on C]UG[Metal:Potassium on G]"));
         }
 
+        /// <summary>
+        /// Test when one fixed and one variable mod are used and share a localization
+        /// expect two results, one with the fixed, and one with the variable
+        /// </summary>
+        [Test]
+        public static void TestFixedAndVariableMods_LocalizationOverlap()
+        {
+            var rna = new RNA("GUACUG");
+            var rnaDigestionParams = new RnaDigestionParams();
+
+            for (int i = 1; i < 3; i++)
+            {
+                rnaDigestionParams.MaxMods = i;
+                var fixedMods = new List<Modification> { PotassiumAdducts[1] }; // C
+                var variableMods = new List<Modification> { SodiumAdducts[1] }; // C
+                var precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                    .ToList();
+
+                var fullSequences = precursors.Select(p => p.FullSequence).ToList();
+                Assert.That(precursors.Count, Is.EqualTo(2));
+                Assert.That(precursors.Any(p => p.NumFixedMods == 1));
+                Assert.That(precursors.Any(p => p.NumVariableMods == 1));
+                Assert.That(precursors.Any(p => p.NumFixedMods == 0));
+                Assert.That(precursors.Any(p => p.NumVariableMods == 0));
+                Assert.That(precursors.All(p => p.NumMods == 1));
+                Assert.That(fullSequences.Contains("GUAC[Metal:Potassium on C]UG"));
+                Assert.That(fullSequences.Contains("GUAC[Metal:Sodium on C]UG"));
+            }
+        }
+
+        /// <summary>
+        /// Test when two variable mods are used and share a localization
+        /// expect three results, one unmodified, and two singly modified
+        /// </summary>
+        [Test]
+        public static void TestVariableMods_LocalizationOverlap()
+        {
+            var rna = new RNA("GUACUG");
+            var rnaDigestionParams = new RnaDigestionParams();
+
+            for (int i = 1; i < 3; i++)
+            {
+                rnaDigestionParams.MaxMods = i;
+                var fixedMods = new List<Modification> { }; // C
+                var variableMods = new List<Modification> { PotassiumAdducts[1], SodiumAdducts[1] };
+                var precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                    .ToList();
+
+                // expect three results, one unmodified, and two singly modified
+                var fullSequences = precursors.Select(p => p.FullSequence).ToList();
+                Assert.That(precursors.Count, Is.EqualTo(3));
+                Assert.That(precursors.Any(p => p.NumFixedMods == 0));
+                Assert.That(precursors.Any(p => p.NumVariableMods == 1));
+                Assert.That(fullSequences.Contains("GUACUG"));
+                Assert.That(fullSequences.Contains("GUAC[Metal:Potassium on C]UG"));
+                Assert.That(fullSequences.Contains("GUAC[Metal:Sodium on C]UG"));
+            }
+        }
+
+        /// <summary>
+        /// Test when one modification is annotated in the database, out of bounds
+        /// expect two results, one with the fixed, and one with the variable
+        /// </summary>
+        [Test]
+        public static void TestDatabaseAnnotatedMods_OutOfBounds()
+        {
+            var rnaDigestionParams = new RnaDigestionParams();
+            var oneBasedModifications = new Dictionary<int, List<Modification>>()
+            {
+                { 23, new List<Modification>() { PotassiumAdducts[1] } }
+            };
+            var rna = new RNA("GUACUG", oneBasedPossibleLocalizedModifications: oneBasedModifications);
+            
+            for (int i = 1; i < 3; i++)
+            {
+                rnaDigestionParams.MaxMods = i;
+                var fixedMods = new List<Modification> {  }; // C
+                var variableMods = new List<Modification> { }; // C
+                var precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                    .ToList();
+
+                var fullSequences = precursors.Select(p => p.FullSequence).ToList();
+                Assert.That(precursors.Count, Is.EqualTo(1));
+                Assert.That(precursors.All(p => p.NumFixedMods == 0));
+                Assert.That(precursors.All(p => p.NumVariableMods == 0));
+                Assert.That(precursors.All(p => p.NumMods == 0));
+                Assert.That(fullSequences.Contains("GUACUG"));
+            }
+        }
+
+        /// <summary>
+        /// Test when one modification is annotated in the database
+        /// expect two results, one unmodified, and one singly modified
+        /// </summary>
+        [Test]
+        public static void TestDatabaseAnnotatedMods_SingleModification()
+        {
+            var rnaDigestionParams = new RnaDigestionParams();
+            var oneBasedModifications = new Dictionary<int, List<Modification>>()
+            {
+                { 4, new List<Modification>() { PotassiumAdducts[1] } }
+            };
+            var rna = new RNA("GUACUG", oneBasedPossibleLocalizedModifications: oneBasedModifications);
+
+            
+            for (int i = 1; i < 3; i++)
+            {
+                rnaDigestionParams.MaxMods = i;
+                var fixedMods = new List<Modification> { }; // C
+                var variableMods = new List<Modification> { };
+                var precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                    .ToList();
+
+                
+                var fullSequences = precursors.Select(p => p.FullSequence).ToList();
+                Assert.That(precursors.Count, Is.EqualTo(2));
+                Assert.That(precursors[0].NumMods, Is.EqualTo(0));
+                Assert.That(precursors[1].NumMods, Is.EqualTo(1));
+                Assert.That(precursors[1].NumVariableMods, Is.EqualTo(1));
+                Assert.That(fullSequences.Contains("GUACUG"));
+                Assert.That(fullSequences.Contains("GUAC[Metal:Potassium on C]UG"));
+            }
+        }
+
+        /// <summary>
+        /// Test when two modifications are annotated in the database at the same location
+        /// expect three results, one unmodified, and two singly modified
+        /// </summary>
+        [Test]
+        public static void TestDatabaseAnnotatedMods_LocalizationOverlap()
+        {
+            var rnaDigestionParams = new RnaDigestionParams();
+            var oneBasedModifications = new Dictionary<int, List<Modification>>()
+            {
+                { 4, new List<Modification>() { PotassiumAdducts[1], SodiumAdducts[1] } }
+            };
+            var rna = new RNA("GUACUG", oneBasedPossibleLocalizedModifications: oneBasedModifications);
+
+            for (int i = 1; i < 3; i++)
+            {
+                rnaDigestionParams.MaxMods = i;
+                var fixedMods = new List<Modification> { };
+                var variableMods = new List<Modification> { };
+                var precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                    .ToList();
+
+                var fullSequences = precursors.Select(p => p.FullSequence).ToList();
+                Assert.That(precursors.Count, Is.EqualTo(3));
+                Assert.That(precursors.Any(p => p.NumFixedMods == 0));
+                Assert.That(precursors.Any(p => p.NumVariableMods == 1));
+                Assert.That(fullSequences.Contains("GUACUG"));
+                Assert.That(fullSequences.Contains("GUAC[Metal:Potassium on C]UG"));
+                Assert.That(fullSequences.Contains("GUAC[Metal:Sodium on C]UG"));
+            }
+        }
+
+        /// <summary>
+        /// Test when two terminal modifications are annotated in the database
+        /// MaxMods 1: expect three results, one unmodified, and two singly modified
+        /// MaxMods 2: expect four results, one unmodified, and two singly modified, and one double modified
+        /// </summary>
+        [Test]
+        public static void TestDatabaseAnnotatedMods_TerminalMods()
+        {
+            var rnaDigestionParams = new RnaDigestionParams();
+            var oneBasedModifications = new Dictionary<int, List<Modification>>()
+            {
+                { 1, new List<Modification>() { TerminalPotassiumAdducts[2]} },
+                { 6, new List<Modification>() { TerminalSodiumAdducts[2]} }
+            };
+            var rna = new RNA("GUACUG", oneBasedPossibleLocalizedModifications: oneBasedModifications);
+
+            // Test when two terminal modifications are annotated in the database
+            for (int i = 1; i < 3; i++)
+            {
+                rnaDigestionParams.MaxMods = i;
+                var fixedMods = new List<Modification> { };
+                var variableMods = new List<Modification> { };
+                var precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                    .ToList();
+
+                // expect three results, one unmodified, and two singly modified
+                var fullSequences = precursors.Select(p => p.FullSequence).ToList();
+                Assert.That(precursors.Count, Is.EqualTo(2 + i));
+                Assert.That(precursors.Any(p => p.NumFixedMods == 0));
+                Assert.That(precursors.Any(p => p.NumVariableMods == 1));
+                Assert.That(fullSequences.Contains("GUACUG"));
+                Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG"));
+                Assert.That(fullSequences.Contains("GUACUG[Metal:Sodium on G]"));
+
+                if (rnaDigestionParams.MaxMods != 2) continue;
+                Assert.That(precursors.Any(p => p.NumVariableMods == 2));
+                Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG[Metal:Sodium on G]"));
+            }
+        }
+
+        /// <summary>
+        /// Test when two terminal modifications are annotated in the database and one database mod on first residue
+        /// MaxMods 1: expect four results, one unmodified, and three singly modified
+        /// MaxMods 2: expect seven results, one unmodified, and three singly modified, and three double modified
+        /// MaxMods 3: expect eight results, one unmodified, and three singly modified, and three double modified, and one triply modified
+        /// </summary>
+        [Test]
+        public static void TestDatabaseAnnotatedMods_TerminalMods_WithFirstResidueDatabaseMod()
+        {
+            var rnaDigestionParams = new RnaDigestionParams();
+            var oneBasedModifications = new Dictionary<int, List<Modification>>()
+            {
+                { 1, new List<Modification>() { TerminalPotassiumAdducts[2], PotassiumAdducts[2] } },
+                { 6, new List<Modification>() { TerminalSodiumAdducts[2]} }
+            };
+            var rna = new RNA("GUACUG", oneBasedPossibleLocalizedModifications: oneBasedModifications);
+
+            // Test when two terminal modifications are annotated in the database and one database mod on first residue
+            for (int i = 1; i < 4; i++)
+            {
+                rnaDigestionParams.MaxMods = i;
+                var fixedMods = new List<Modification> { };
+                var variableMods = new List<Modification> { };
+
+                var precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                    .ToList();
+
+                var fullSequences = precursors.Select(p => p.FullSequence).ToList();
+                Assert.That(precursors.All(p => p.NumFixedMods == 0));
+
+                switch (rnaDigestionParams.MaxMods)
+                {
+                    case 1:
+                        Assert.That(precursors.Count(), Is.EqualTo(4));
+                        Assert.That(precursors.Skip(1).All(p => p.NumVariableMods == 1));
+                        break;
+                    case 2:
+                        Assert.That(precursors.Count(), Is.EqualTo(7));
+                        Assert.That(precursors.Skip(1).All(p => p.NumVariableMods >= 1));
+                        break;
+
+                    case 3:
+                        Assert.That(precursors.Count(), Is.EqualTo(8));
+                        Assert.That(precursors.Skip(1).All(p => p.NumVariableMods >= 1));
+                        break;
+                }
+
+                if (rnaDigestionParams.MaxMods >= 1)
+                {
+                    
+                    Assert.That(fullSequences.Contains("GUACUG"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG"));
+                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG"));
+                    Assert.That(fullSequences.Contains("GUACUG[Metal:Sodium on G]"));
+                }
+                else if (rnaDigestionParams.MaxMods >= 2)
+                {
+                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG"));
+                }
+                else if (rnaDigestionParams.MaxMods >= 3)
+                {
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG[Metal:Sodium on G]"));
+                }
+            }
+        }
+
+        /// <summary>
+        /// Test when two terminal modifications are annotated in the database and one database mod on first residue
+        /// MaxMods 1: expect five results, one unmodified, and four singly modified
+        /// MaxMods 2: expect eleven results, one unmodified, and four singly modified, and six double modified
+        /// MaxMods 3: expect fifteen results, one unmodified, and four singly modified, and six double modified, and four triply modified
+        /// </summary>
+        [Test]
+        public static void TestDatabaseAnnotatedMods_TerminalMods_WithFirstResidueVariableMod()
+        {
+            var rnaDigestionParams = new RnaDigestionParams();
+            var oneBasedModifications = new Dictionary<int, List<Modification>>()
+            {
+                { 1, new List<Modification>() { TerminalPotassiumAdducts[2] } },
+                { 6, new List<Modification>() { TerminalSodiumAdducts[2]} }
+            };
+            var rna = new RNA("GUACUG", oneBasedPossibleLocalizedModifications: oneBasedModifications);
+
+            // Test when two terminal modifications are annotated in the database and one database mod on first residue
+            for (int i = 1; i < 4; i++)
+            {
+                rnaDigestionParams.MaxMods = i;
+                var fixedMods = new List<Modification> { };
+                var variableMods = new List<Modification> { PotassiumAdducts[2] };
+
+                var precursors = rna.Digest(rnaDigestionParams, fixedMods, variableMods)
+                    .ToList();
+
+                var fullSequences = precursors.Select(p => p.FullSequence).ToList();
+                Assert.That(precursors.All(p => p.NumFixedMods == 0));
+
+                switch (rnaDigestionParams.MaxMods)
+                {
+                    case 1:
+                        Assert.That(precursors.Count(), Is.EqualTo(5));
+                        Assert.That(precursors.Skip(1).All(p => p.NumVariableMods == 1));
+                        break;
+                    case 2:
+                        Assert.That(precursors.Count(), Is.EqualTo(11));
+                        Assert.That(precursors.Skip(1).All(p => p.NumVariableMods >= 1));
+                        break;
+
+                    case 3:
+                        Assert.That(precursors.Count(), Is.EqualTo(15));
+                        Assert.That(precursors.Skip(1).All(p => p.NumVariableMods >= 1));
+                        break;
+                }
+
+                if (rnaDigestionParams.MaxMods >= 1)
+                {
+
+                    Assert.That(fullSequences.Contains("GUACUG"));
+                    Assert.That(fullSequences.Contains("GUACUG[Metal:Potassium on G]"));
+                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG"));
+                    Assert.That(fullSequences.Contains("GUACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG"));
+                }
+                else if (rnaDigestionParams.MaxMods >= 2)
+                {
+                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Potassium on G]"));
+                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("GUACUG[Metal:Potassium on G][Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG[Metal:Potassium on G]"));
+                }
+                else if (rnaDigestionParams.MaxMods >= 3)
+                {
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG[Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]G[Metal:Potassium on G]UACUG[Metal:Potassium on G]"));
+
+                    Assert.That(fullSequences.Contains("G[Metal:Potassium on G]UACUG[Metal:Potassium on G][Metal:Sodium on G]"));
+                    Assert.That(fullSequences.Contains("[Metal:Potassium on G]GUACUG[Metal:Potassium on G][Metal:Sodium on G]"));
+                }
+            }
+        }
+
+
+
+
         #endregion
     }
 }
diff --git a/mzLib/Test/Transcriptomics/TestNucleicAcid.cs b/mzLib/Test/Transcriptomics/TestNucleicAcid.cs
index a0c5619c9..47e98d708 100644
--- a/mzLib/Test/Transcriptomics/TestNucleicAcid.cs
+++ b/mzLib/Test/Transcriptomics/TestNucleicAcid.cs
@@ -93,6 +93,9 @@ public void TestConstructorsAndEquality(string sequence, double monoMass)
             CollectionAssert.AreEqual(rna.NucleicAcidArray.Select(p => p.Letter), sequence);
             Assert.That(rna.FivePrimeTerminus.Equals(NucleicAcid.DefaultFivePrimeTerminus));
             Assert.That(rna.ThreePrimeTerminus.Equals(NucleicAcid.DefaultThreePrimeTerminus));
+            rna.ThreePrimeTerminus = rna.ThreePrimeTerminus;
+            Assert.That(rna.ThreePrimeTerminus.Equals(NucleicAcid.DefaultThreePrimeTerminus));
+
             List<Nucleotide> nucList = new();
             foreach (var nucleotide in sequence)
             {
@@ -145,11 +148,11 @@ public void TestElectroSpraySeries(string sequence, int[] charges, double[] mzs)
         {
             RNA rna = new(sequence);
 
-            int i = 0;
-            foreach (var ion in rna.GetElectrospraySeries(charges.First(), charges.Last()))
+            var esiSeries = rna.GetElectrospraySeries(charges.First(), charges.Last()).ToArray();
+            for (int j = 0; j < mzs.Length; j++)
             {
-                Assert.That(ion, Is.EqualTo(mzs[i]).Within(0.001));
-                i++;
+                var ion = esiSeries[j];
+                Assert.That(ion, Is.EqualTo(mzs[j]).Within(0.01));
             }
         }
 
@@ -158,13 +161,13 @@ public void TestElectroSpraySeries(string sequence, int[] charges, double[] mzs)
         public void TestReplaceTerminusWithElectroSpraySeries(string sequence, int[] charges, double[] mzs)
         {
             RNA rna = new("GUACUG");
-            rna.FivePrimeTerminus = new ChemicalFormula();
+            rna.FivePrimeTerminus = ChemicalFormula.ParseFormula("H1");
 
-            int i = 0;
-            foreach (var ion in rna.GetElectrospraySeries(charges.First(), charges.Last()))
+            var esiSeries = rna.GetElectrospraySeries(charges.Last(), charges.First()).ToArray();
+            for (int j = 0; j < mzs.Length; j++)
             {
-                Assert.That(ion, Is.EqualTo(mzs[i]).Within(0.001));
-                i++;
+                var ion = esiSeries[j];
+                Assert.That(ion, Is.EqualTo(mzs[j]).Within(0.01));
             }
         }
     }
diff --git a/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs b/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
index a2ad1741b..d2d41cba7 100644
--- a/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
+++ b/mzLib/Transcriptomics/Digestion/NucleolyticOligo.cs
@@ -46,7 +46,7 @@ public override string ToString()
         /// <remarks>
         /// Code heavily borrowed from ProteolyticPeptide.GetModifiedPeptides
         /// </remarks>
-        internal IEnumerable<OligoWithSetMods> GenerateModifiedOligos(IEnumerable<Modification> allKnownFixedMods,
+        internal IEnumerable<OligoWithSetMods> GenerateModifiedOligos(List<Modification> allKnownFixedMods,
             RnaDigestionParams digestionParams, List<Modification> variableModifications)
         {
             int oligoLength = OneBasedEndResidue - OneBasedStartResidue + 1;
@@ -63,7 +63,7 @@ internal IEnumerable<OligoWithSetMods> GenerateModifiedOligos(IEnumerable<Modifi
             // collect all possible variable mods, skipping if there is a database annotated modification
             foreach (Modification variableModification in variableModifications)
             {
-                // Check if can be a n-term mod
+                // Check if can be a 5'-term mod
                 if (CanBeFivePrime(variableModification, oligoLength) && !ModificationLocalization.UniprotModExists(NucleicAcid, 1, variableModification))
                 {
                     fivePrimeVariableMods.Add(variableModification);
@@ -86,7 +86,7 @@ internal IEnumerable<OligoWithSetMods> GenerateModifiedOligos(IEnumerable<Modifi
                         }
                     }
                 }
-                // Check if can be a c-term mod
+                // Check if can be a 3'-term mod
                 if (CanBeThreePrime(variableModification, oligoLength) && !ModificationLocalization.UniprotModExists(NucleicAcid, oligoLength, variableModification))
                 {
                     threePrimeVariableMods.Add(variableModification);
@@ -107,7 +107,7 @@ internal IEnumerable<OligoWithSetMods> GenerateModifiedOligos(IEnumerable<Modifi
                 {
                     if (modWithMass is Modification variableModification)
                     {
-                        // Check if can be a n-term mod
+                        // Check if can be a 5'-term mod
                         if (locInPeptide == 1 && CanBeFivePrime(variableModification, oligoLength) && !NucleicAcid.IsDecoy)
                         {
                             fivePrimeVariableMods.Add(variableModification);
@@ -130,7 +130,7 @@ internal IEnumerable<OligoWithSetMods> GenerateModifiedOligos(IEnumerable<Modifi
                             }
                         }
 
-                        // Check if can be a c-term mod
+                        // Check if can be a 3'-term mod
                         if (locInPeptide == oligoLength && CanBeThreePrime(variableModification, oligoLength) && !NucleicAcid.IsDecoy)
                         {
                             threePrimeVariableMods.Add(variableModification);
@@ -139,24 +139,24 @@ internal IEnumerable<OligoWithSetMods> GenerateModifiedOligos(IEnumerable<Modifi
                 }
             }
 
-            int variable_modification_isoforms = 0;
+            int variableModificationIsoforms = 0;
 
             // Add the mods to the oligo by return numerous OligoWithSetMods
-            foreach (Dictionary<int, Modification> kvp in GetVariableModificationPatterns(twoBasedPossibleVariableAndLocalizeableModifications, maxModsForOligo, oligoLength))
+            foreach (Dictionary<int, Modification> variableModPattern in GetVariableModificationPatterns(twoBasedPossibleVariableAndLocalizeableModifications, maxModsForOligo, oligoLength))
             {
                 int numFixedMods = 0;
-                foreach (var ok in GetFixedModsOneIsNorFivePrimeTerminus(oligoLength, allKnownFixedMods))
+                foreach (var fixedModPattern in GetFixedModsOneIsNorFivePrimeTerminus(oligoLength, allKnownFixedMods))
                 {
-                    if (!kvp.ContainsKey(ok.Key))
+                    if (!variableModPattern.ContainsKey(fixedModPattern.Key))
                     {
                         numFixedMods++;
-                        kvp.Add(ok.Key, ok.Value);
+                        variableModPattern.Add(fixedModPattern.Key, fixedModPattern.Value);
                     }
                 }
                 yield return new OligoWithSetMods(NucleicAcid, digestionParams, OneBasedStartResidue, OneBasedEndResidue, MissedCleavages,
-                    CleavageSpecificityForFdrCategory, kvp, numFixedMods, _fivePrimeTerminus, _threePrimeTerminus);
-                variable_modification_isoforms++;
-                if (variable_modification_isoforms == maximumVariableModificationIsoforms)
+                    CleavageSpecificityForFdrCategory, variableModPattern, numFixedMods, _fivePrimeTerminus, _threePrimeTerminus);
+                variableModificationIsoforms++;
+                if (variableModificationIsoforms == maximumVariableModificationIsoforms)
                 {
                     yield break;
                 }
diff --git a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
index f432b5fed..5fab7d6bd 100644
--- a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
+++ b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
@@ -342,7 +342,7 @@ private Dictionary<int, Modification> GetModsAfterDeserialization(Dictionary<str
                         catch (Exception e)
                         {
                             throw new MzLibUtil.MzLibException(
-                                "Error while trying to parse string into peptide: " + e.Message);
+                                "Error while trying to parse string into peptide: " + e.Message, e);
                         }
 
                         if (!idToMod.TryGetValue(modId, out Modification mod))
@@ -351,7 +351,7 @@ private Dictionary<int, Modification> GetModsAfterDeserialization(Dictionary<str
                                 "Could not find modification while reading string: " + FullSequence);
                         }
 
-                        if (mod.LocationRestriction.Contains("C-terminal.") && r == FullSequence.Length - 1)
+                        if (mod.LocationRestriction.Contains("3'-terminal.") && r == FullSequence.Length - 1)
                         {
                             currentModificationLocation = BaseSequence.Length + 2;
                         }
diff --git a/mzLib/Transcriptomics/NucleicAcid.cs b/mzLib/Transcriptomics/NucleicAcid.cs
index c99934f75..110fbeb2f 100644
--- a/mzLib/Transcriptomics/NucleicAcid.cs
+++ b/mzLib/Transcriptomics/NucleicAcid.cs
@@ -3,6 +3,7 @@
 using Omics.Modifications;
 using Omics;
 using System.Text;
+using MzLibUtil;
 using Transcriptomics.Digestion;
 
 namespace Transcriptomics
@@ -186,8 +187,8 @@ public IEnumerable<IBioPolymerWithSetMods> Digest(IDigestionParams digestionPara
             bool topDownTruncationSearch = false)
         {
             if (digestionParameters is not RnaDigestionParams digestionParams)
-                throw new ArgumentException(
-                    "DigestionParameters must be of type DigestionParams for protein digestion");
+                throw new MzLibException(
+                    "DigestionParameters must be of type DigestionParams for protein digestion", new ArgumentException());
             allKnownFixedMods ??= new();
             variableModifications ??= new();
 
@@ -220,10 +221,11 @@ public IEnumerable<OligoWithSetMods> Digest(RnaDigestionParams digestionParamete
 
         public IEnumerable<double> GetElectrospraySeries(int minCharge, int maxCharge)
         {
-            for (int i = minCharge; i < maxCharge; i++)
-            {
-                yield return this.ToMz(i);
-            }
+            if (minCharge > maxCharge)
+                (minCharge, maxCharge) = (maxCharge, minCharge);
+            
+            for (int i = maxCharge; i > minCharge - 1; i--)
+                yield return this.ToMz(i); 
         }
 
         #endregion
diff --git a/mzLib/mzLib.sln.DotSettings b/mzLib/mzLib.sln.DotSettings
index 6522afcd5..6c67babc8 100644
--- a/mzLib/mzLib.sln.DotSettings
+++ b/mzLib/mzLib.sln.DotSettings
@@ -1,4 +1,5 @@
 ﻿ <wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
+	<s:Boolean x:Key="/Default/UserDictionary/Words/=Adducts/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=decon/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Deconv/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Deconvolute/@EntryIndexedValue">True</s:Boolean>

From b295b63324e02eaffb2cb3270715001194f750ff Mon Sep 17 00:00:00 2001
From: nbollis <nbollis@comcast.net>
Date: Tue, 1 Oct 2024 19:01:44 -0500
Subject: [PATCH 16/17] Add test data files and methods for RNA sequence
 handling

Added new test data files (`20mer1.fasta`, `20mer1.fasta.gz`, `20mer1.xml`, `20mer1.xml.gz`) to the `Transcriptomics\TestData` directory in the `Test.csproj` file, ensuring they are copied to the output directory. Introduced `TestDbReadingDifferentExtensions` in `TestDbLoader.cs` to verify RNA database reading from various formats. Added `TestDigestionMaxIsoforms` in `TestDigestion.cs` to test RNA sequence digestion with max isoforms. Updated `WriteNucleicAcidXmlDatabase` in `ProteinDbWriter.cs` with remarks for future implementation. Added a TODO in `RnaDecoyGenerator.cs` regarding palindromic sequences' impact on fragment ions. Included new RNA sequence data in test files for validation.
---
 mzLib/Test/Test.csproj                        |  12 ++++++++++
 .../Transcriptomics/TestData/20mer1.fasta     |   2 ++
 .../Transcriptomics/TestData/20mer1.fasta.gz  | Bin 0 -> 135 bytes
 .../Test/Transcriptomics/TestData/20mer1.xml  |  17 ++++++++++++++
 .../Transcriptomics/TestData/20mer1.xml.gz    | Bin 0 -> 254 bytes
 mzLib/Test/Transcriptomics/TestDbLoader.cs    |  22 ++++++++++++++++++
 mzLib/Test/Transcriptomics/TestDigestion.cs   |  12 +++++++++-
 .../ProteinDbWriter.cs                        |   4 ++++
 .../Transcriptomics/RnaDecoyGenerator.cs      |   1 +
 9 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 mzLib/Test/Transcriptomics/TestData/20mer1.fasta
 create mode 100644 mzLib/Test/Transcriptomics/TestData/20mer1.fasta.gz
 create mode 100644 mzLib/Test/Transcriptomics/TestData/20mer1.xml
 create mode 100644 mzLib/Test/Transcriptomics/TestData/20mer1.xml.gz

diff --git a/mzLib/Test/Test.csproj b/mzLib/Test/Test.csproj
index 76ab4ac9b..fa53ca295 100644
--- a/mzLib/Test/Test.csproj
+++ b/mzLib/Test/Test.csproj
@@ -495,6 +495,18 @@
     <None Update="FileReadingTests\SearchResults\VariantCrossTest.psmtsv">
       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
     </None>
+    <None Update="Transcriptomics\TestData\20mer1.fasta">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
+    <None Update="Transcriptomics\TestData\20mer1.fasta.gz">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="Transcriptomics\TestData\20mer1.xml">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
+    <None Update="Transcriptomics\TestData\20mer1.xml.gz">
+      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+    </None>
     <None Update="Transcriptomics\TestData\ModomicsUnmodifiedTrimmed.fasta">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
diff --git a/mzLib/Test/Transcriptomics/TestData/20mer1.fasta b/mzLib/Test/Transcriptomics/TestData/20mer1.fasta
new file mode 100644
index 000000000..c222589c1
--- /dev/null
+++ b/mzLib/Test/Transcriptomics/TestData/20mer1.fasta
@@ -0,0 +1,2 @@
+>id:2|Name:20mer1|SOterm:20mer1|Type:tRNA|Subtype:Ala|Feature:VGC|Cellular_Localization:freezer|Species:standard
+GUACUGCCUCUAGUGAAGCA
\ No newline at end of file
diff --git a/mzLib/Test/Transcriptomics/TestData/20mer1.fasta.gz b/mzLib/Test/Transcriptomics/TestData/20mer1.fasta.gz
new file mode 100644
index 0000000000000000000000000000000000000000..2fe54f9ab05d0cf835bd1fc6f7741d53388df7c7
GIT binary patch
literal 135
zcmV;20C@i&iwFRRiTq{&12xCJ3IZ_<0N`ElUG&BbLI{{ePO#Qh;x$5{_d~87Jo513
z;J1CJoS^ewMuqM$svSLCtJ?VA{j(_4TaO;vtNwyT{H#)&DLj~*w3JrFo|k!$k~gV&
pp5Se&o0^A3hpa2CRi-4C_B)P9k%=Nj<j9Cj_yrC~a1e$7004-GLL&eG

literal 0
HcmV?d00001

diff --git a/mzLib/Test/Transcriptomics/TestData/20mer1.xml b/mzLib/Test/Transcriptomics/TestData/20mer1.xml
new file mode 100644
index 000000000..6f17d6f3d
--- /dev/null
+++ b/mzLib/Test/Transcriptomics/TestData/20mer1.xml
@@ -0,0 +1,17 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<mzLibProteinDb>
+  <entry>
+    <accession>20mer1</accession>
+    <name>20mer1</name>
+    <protein>
+      <recommendedName>
+        <fullName>20mer1</fullName>
+      </recommendedName>
+    </protein>
+    <gene />
+    <organism>
+      <name type="scientific">standard</name>
+    </organism>
+    <sequence length="20">GUACUGCCUCUAGUGAAGCA</sequence>
+  </entry>
+</mzLibProteinDb>
\ No newline at end of file
diff --git a/mzLib/Test/Transcriptomics/TestData/20mer1.xml.gz b/mzLib/Test/Transcriptomics/TestData/20mer1.xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..19dac16bf1c4a229db2685f1897563244d556383
GIT binary patch
literal 254
zcmV<a00I9WiwFQ|i2P;%18q<{PQ)+}?UA^H<sO{fio(gtGEy3}qR9aqXA&&M<HdFm
z%hf125?8>R&4(a5zt6n!yzkF1et5S%c>^De!<_E!GfDupYYcWd9b$X;Ko7<34Ywap
zrhfJ=g2Bf+F@$iy=+EOb0i`vBOG>fNEchK~D>GS8Sy<E&Ov34!JHbSF(9T-G2AbpQ
z8>M#W`{VDRg;=K9|Kpr(_wWu72%C}fokA1rnsfOgiRTmKG-#8qHLcOK2vK39e6ziQ
z{h8+hUWZgAkRGs$FFD<3v?wbft5V8JR-&v*AxbGYo2}Pc+1x8<o7U^%2k`z)oUs7_
E0Dtd&#{d8T

literal 0
HcmV?d00001

diff --git a/mzLib/Test/Transcriptomics/TestDbLoader.cs b/mzLib/Test/Transcriptomics/TestDbLoader.cs
index 6e1725b8f..e1ef6af90 100644
--- a/mzLib/Test/Transcriptomics/TestDbLoader.cs
+++ b/mzLib/Test/Transcriptomics/TestDbLoader.cs
@@ -145,5 +145,27 @@ public static void TestTranscribe(string input, string expected, bool isCodingSt
         {
             Assert.That(input.Transcribe(isCodingStrand), Is.EqualTo(expected));
         }
+
+        [Test]
+        [TestCase("20mer1.fasta")]
+        [TestCase("20mer1.fasta.gz")]
+        [TestCase("20mer1.xml")]
+        [TestCase("20mer1.xml.gz")]
+        public static void TestDbReadingDifferentExtensions(string databaseFileName)
+        {
+            var dbPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Transcriptomics", "TestData",
+                databaseFileName);
+
+            List<RNA> rna;
+            if (dbPath.Contains("fasta"))
+                rna = RnaDbLoader.LoadRnaFasta(dbPath, true, DecoyType.None, false,
+                    out var errors);
+            else
+                rna = RnaDbLoader.LoadRnaXML(dbPath, true, DecoyType.None, false,
+                    new List<Modification>(), new List<string>(), out _);
+            
+            Assert.That(rna.Count, Is.EqualTo(1));
+            Assert.That(rna.First().BaseSequence, Is.EqualTo("GUACUGCCUCUAGUGAAGCA"));
+        }
     }
 }
diff --git a/mzLib/Test/Transcriptomics/TestDigestion.cs b/mzLib/Test/Transcriptomics/TestDigestion.cs
index a46017c0e..acfcacdef 100644
--- a/mzLib/Test/Transcriptomics/TestDigestion.cs
+++ b/mzLib/Test/Transcriptomics/TestDigestion.cs
@@ -1168,8 +1168,18 @@ public static void TestDatabaseAnnotatedMods_TerminalMods_WithFirstResidueVariab
             }
         }
 
+        [Test]
+        public static void TestDigestionMaxIsoforms()
+        {
+            var rna = new RNA("GUACUAGACUACAUGGUACAUCA");
+            var rnaDigestionParams = new RnaDigestionParams();
+            var variableMods = SodiumAdducts.Concat(PotassiumAdducts)
+                .Concat(TerminalPotassiumAdducts).Concat(TerminalSodiumAdducts).ToList();
 
-
+            var digestionProducts = rna.Digest(rnaDigestionParams, new List<Modification>(), variableMods)
+                .ToList();
+            Assert.That(digestionProducts.Count, Is.EqualTo(rnaDigestionParams.MaxModificationIsoforms));
+        }
 
         #endregion
     }
diff --git a/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs b/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs
index d47912f4d..dadba9e11 100644
--- a/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs
+++ b/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs
@@ -37,6 +37,10 @@ public static Dictionary<string, int> WriteXmlDatabase(
         /// <param name="nucleicAcidList">A list of nucleic acid sequences to be written to the database.</param>
         /// <param name="outputFileName">The name of the output XML file.</param>
         /// <returns>A dictionary of new modification residue entries.</returns>
+        /// <remarks>
+        /// Several chunks of code are commented out. These are blocks that are intended to be implmented in the future, but
+        /// are not necessary for the bare bones implementation of Transcriptomics
+        /// </remarks>
         private static Dictionary<string, int> WriteNucleicAcidXmlDatabase(
             Dictionary<string, HashSet<Tuple<int, Modification>>> additionalModsToAddToProteins,
             List<NucleicAcid> nucleicAcidList, string outputFileName)
diff --git a/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDecoyGenerator.cs b/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDecoyGenerator.cs
index 6bd25e31f..b9cc20e1d 100644
--- a/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDecoyGenerator.cs
+++ b/mzLib/UsefulProteomicsDatabases/Transcriptomics/RnaDecoyGenerator.cs
@@ -19,6 +19,7 @@ namespace UsefulProteomicsDatabases.Transcriptomics
     /// The <c>GenerateDecoys</c> method serves as the main entry point, delegating to specific decoy generation methods based on the specified <see cref="DecoyType"/>.
     /// TODO: Implement Shuffle and Slide Decoys
     /// TODO: Consider passing digestion motif as optional parameter to leave digestion sites intact. Currently leaving the 3' intact as it is the predominant cleavage motif.
+    /// TODO: Consider palindromic sequences and the result they have on fragment ions (d/z are identical, c/y are identical). This will be particularly important for slided decoys
     /// </remarks>
     public static class RnaDecoyGenerator
     {

From 04f7e67aadf082b8949ee7a10c50e3559988e51e Mon Sep 17 00:00:00 2001
From: nbollis <nbollis@comcast.net>
Date: Tue, 8 Oct 2024 12:56:27 -0500
Subject: [PATCH 17/17] Added test coverage to the localize method within
 BioPolymerWithSetMods

---
 mzLib/Omics/IBioPolymerWithSetMods.cs         | 11 ++-
 .../PeptideWithSetModifications.cs            |  8 +-
 .../Transcriptomics/TestOligoWithSetMods.cs   | 79 +++++++++++++++++++
 .../Digestion/OligoWithSetMods.cs             | 16 +++-
 mzLib/Transcriptomics/RNA.cs                  |  1 +
 5 files changed, 106 insertions(+), 9 deletions(-)
 create mode 100644 mzLib/Test/Transcriptomics/TestOligoWithSetMods.cs

diff --git a/mzLib/Omics/IBioPolymerWithSetMods.cs b/mzLib/Omics/IBioPolymerWithSetMods.cs
index 1c3ade66a..0b9926a01 100644
--- a/mzLib/Omics/IBioPolymerWithSetMods.cs
+++ b/mzLib/Omics/IBioPolymerWithSetMods.cs
@@ -50,7 +50,16 @@ public void Fragment(DissociationType dissociationType, FragmentationTerminus fr
         public void FragmentInternally(DissociationType dissociationType, int minLengthOfFragments,
             List<Product> products);
 
-        public IBioPolymerWithSetMods Localize(int j, double massToLocalize);
+        /// <summary>
+        /// Outputs a duplicate IBioPolymerWithSetMods with a localized mass shift, replacing a modification when present
+        /// <remarks>
+        /// Used to localize an unknown mass shift in the MetaMorpheus Localization Engine
+        /// </remarks>
+        /// </summary>
+        /// <param name="indexOfMass">The index of the modification in the AllModOneIsNTerminus Dictionary - 2 (idk why -2)</param>
+        /// <param name="massToLocalize">The mass to add to the BioPolymer</param>
+        /// <returns></returns>
+        public IBioPolymerWithSetMods Localize(int indexOfMass, double massToLocalize);
 
         public static string GetBaseSequenceFromFullSequence(string fullSequence)
         {
diff --git a/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs b/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs
index 1b7d32d61..8eb6e6bdf 100644
--- a/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs
+++ b/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs
@@ -613,17 +613,17 @@ public void FragmentInternally(DissociationType dissociationType, int minLengthO
             }
         }
 
-        public IBioPolymerWithSetMods Localize(int j, double massToLocalize)
+        public IBioPolymerWithSetMods Localize(int indexOfMass, double massToLocalize)
         {
             var dictWithLocalizedMass = new Dictionary<int, Modification>(AllModsOneIsNterminus);
             double massOfExistingMod = 0;
-            if (dictWithLocalizedMass.TryGetValue(j + 2, out Modification modToReplace))
+            if (dictWithLocalizedMass.TryGetValue(indexOfMass + 2, out Modification modToReplace))
             {
                 massOfExistingMod = (double)modToReplace.MonoisotopicMass;
-                dictWithLocalizedMass.Remove(j + 2);
+                dictWithLocalizedMass.Remove(indexOfMass + 2);
             }
 
-            dictWithLocalizedMass.Add(j + 2, new Modification(_locationRestriction: "Anywhere.", _monoisotopicMass: massToLocalize + massOfExistingMod));
+            dictWithLocalizedMass.Add(indexOfMass + 2, new Modification(_locationRestriction: "Anywhere.", _monoisotopicMass: massToLocalize + massOfExistingMod));
 
             var peptideWithLocalizedMass = new PeptideWithSetModifications(Protein, _digestionParams, OneBasedStartResidueInProtein, OneBasedEndResidueInProtein,
                 CleavageSpecificityForFdrCategory, PeptideDescription, MissedCleavages, dictWithLocalizedMass, NumFixedMods);
diff --git a/mzLib/Test/Transcriptomics/TestOligoWithSetMods.cs b/mzLib/Test/Transcriptomics/TestOligoWithSetMods.cs
new file mode 100644
index 000000000..6255ef2fd
--- /dev/null
+++ b/mzLib/Test/Transcriptomics/TestOligoWithSetMods.cs
@@ -0,0 +1,79 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using NUnit.Framework;
+using Omics.Modifications;
+using Transcriptomics.Digestion;
+using Transcriptomics;
+
+namespace Test.Transcriptomics
+{
+    [ExcludeFromCodeCoverage]
+    public static class TestOligoWithSetMods
+    {
+        [Test]
+        [TestCase( 0, 1, 20.45)]
+        [TestCase(1, 1, 20.45)]
+        [TestCase( 0, 2, 20.45)]
+        [TestCase(1, 2, 20.45)]
+        [TestCase( 0, 5, 28.37)]
+        [TestCase(1, 5, 28.37)]
+        [TestCase( 0, 6, 28.37)]
+        [TestCase(1, 6, 28.37)]
+        public static void TestLocalize(int modsOnOligo, int indexOfMass, double massToLocalize)
+        {
+            var oligoWithSetMods = new RNA("GUACUG", 
+                    oneBasedPossibleLocalizedModifications: new Dictionary<int, List<Modification>> { { 4, [TestDigestion.PotassiumAdducts[1]] } })
+                .Digest(new RnaDigestionParams(), [], [])
+                .ElementAt(modsOnOligo);
+
+            Assert.That(oligoWithSetMods.AllModsOneIsNterminus.Count, Is.EqualTo(modsOnOligo));
+
+            // Act
+            var localizedOligo = oligoWithSetMods.Localize(indexOfMass - 2, massToLocalize);
+
+            // Assert
+            int expectedModificationCount;
+            double expectedMass;
+            if (modsOnOligo == 1) // if the oligo started with a mod
+            {
+                int indexOfOriginalMod = oligoWithSetMods.AllModsOneIsNterminus.Keys.First();
+
+                // ensure original modification exist
+                Assert.That(localizedOligo.AllModsOneIsNterminus.ContainsKey(indexOfOriginalMod));
+
+                if (indexOfOriginalMod != indexOfMass) // Additional mass was added to a different location
+                {
+                    expectedModificationCount = modsOnOligo + 1;
+                    expectedMass = massToLocalize;
+
+                    // ensure original modification is still intact
+                    Assert.That(oligoWithSetMods.OneBasedPossibleLocalizedModifications[indexOfOriginalMod][0].MonoisotopicMass, 
+                        Is.EqualTo(localizedOligo.AllModsOneIsNterminus[indexOfOriginalMod].MonoisotopicMass));
+                }
+                else // Additional mass was added to the location of an existing modification
+                {
+                    expectedModificationCount = modsOnOligo;
+                    expectedMass = massToLocalize + TestDigestion.PotassiumAdducts[1].MonoisotopicMass!.Value;
+
+                    // ensure original modification has been altered
+                    Assert.That(oligoWithSetMods.OneBasedPossibleLocalizedModifications[indexOfOriginalMod][0].MonoisotopicMass,
+                        Is.Not.EqualTo(localizedOligo.AllModsOneIsNterminus[indexOfOriginalMod].MonoisotopicMass));
+                }
+            }
+            else // oligo started with no modifications
+            {
+                expectedModificationCount = modsOnOligo + 1;
+                expectedMass = massToLocalize;
+            }
+
+
+            Assert.That(expectedModificationCount, Is.EqualTo(localizedOligo.AllModsOneIsNterminus.Count));
+            Assert.That(localizedOligo.AllModsOneIsNterminus.ContainsKey(indexOfMass));
+            Assert.That(expectedMass, Is.EqualTo(localizedOligo.AllModsOneIsNterminus[indexOfMass].MonoisotopicMass));
+        }
+    }
+}
diff --git a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
index 5fab7d6bd..19902f57e 100644
--- a/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
+++ b/mzLib/Transcriptomics/Digestion/OligoWithSetMods.cs
@@ -286,17 +286,25 @@ public IEnumerable<Product> GetNeutralFragments(ProductType type, Nucleotide[]?
             }
         }
 
-        public IBioPolymerWithSetMods Localize(int j, double massToLocalize)
+        /// <summary>
+        /// Outputs a duplicate IBioPolymerWithSetMods with a localized mass shift, replacing a modification when present
+        /// <remarks>
+        /// Used to localize an unknown mass shift in the MetaMorpheus Localization Engine
+        /// </remarks>
+        /// </summary>
+        /// <param name="indexOfMass">The index of the modification in the AllModOneIsNTerminus Dictionary - 2 (idk why -2)</param>
+        /// <param name="massToLocalize">The mass to add to the BioPolymer</param>
+        public IBioPolymerWithSetMods Localize(int indexOfMass, double massToLocalize)
         {
             var dictWithLocalizedMass = new Dictionary<int, Modification>(AllModsOneIsNterminus);
             double massOfExistingMod = 0;
-            if (dictWithLocalizedMass.TryGetValue(j + 2, out Modification modToReplace))
+            if (dictWithLocalizedMass.TryGetValue(indexOfMass + 2, out Modification modToReplace))
             {
                 massOfExistingMod = (double)modToReplace.MonoisotopicMass;
-                dictWithLocalizedMass.Remove(j + 2);
+                dictWithLocalizedMass.Remove(indexOfMass + 2);
             }
 
-            dictWithLocalizedMass.Add(j + 2, new Modification(_locationRestriction: "Anywhere.", _monoisotopicMass: massToLocalize + massOfExistingMod));
+            dictWithLocalizedMass.Add(indexOfMass + 2, new Modification(_locationRestriction: "Anywhere.", _monoisotopicMass: massToLocalize + massOfExistingMod));
 
             var peptideWithLocalizedMass = new OligoWithSetMods(NucleicAcid, _digestionParams, OneBasedStartResidue, OneBasedEndResidue, MissedCleavages,
                 CleavageSpecificityForFdrCategory, dictWithLocalizedMass, NumFixedMods, FivePrimeTerminus, ThreePrimeTerminus);
diff --git a/mzLib/Transcriptomics/RNA.cs b/mzLib/Transcriptomics/RNA.cs
index 5d5fcb2f6..41e3a64e9 100644
--- a/mzLib/Transcriptomics/RNA.cs
+++ b/mzLib/Transcriptomics/RNA.cs
@@ -31,6 +31,7 @@ public RNA(string sequence, IHasChemicalFormula? fivePrimeTerm = null, IHasChemi
         /// <param name="oneBasedPossibleModifications"></param>
         /// <param name="isContaminant"></param>
         /// <param name="isDecoy"></param>
+        /// <param name="geneNames"></param>
         /// <param name="databaseAdditionalFields"></param>
         public RNA(string sequence, string name, string identifier, string organism, string databaseFilePath,
             IHasChemicalFormula? fivePrimeTerminus = null, IHasChemicalFormula? threePrimeTerminus = null,