Skip to content

Commit

Permalink
Add missing header tags as per the spec (PR pysam-developers#1238)
Browse files Browse the repository at this point in the history
Add omitted tags to KNOWN_HEADER_FIELDS and VALID_HEADER_ORDER, so that
AlignmentHeader.from_dict()/to_dict() will preserve all header field tags
defined in the SAM specification. Addresses pysam-developers#1237.

Exercise TP:linear in some header tests.

Co-authored-by: John Marshall <jmarshall@hey.com>
  • Loading branch information
nh13 and jmarshall authored Oct 20, 2023
1 parent 98a37ad commit cdc0ed1
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
15 changes: 9 additions & 6 deletions pysam/libcalignmentfile.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -111,27 +111,30 @@ VALID_HEADER_TYPES = {"HD" : Mapping,
VALID_HEADERS = ("HD", "SQ", "RG", "PG", "CO")

# default type conversions within SAM header records
KNOWN_HEADER_FIELDS = {"HD" : {"VN" : str, "SO" : str, "GO" : str},
KNOWN_HEADER_FIELDS = {"HD" : {"VN" : str, "SO" : str, "GO" : str,
"SS" : str,},
"SQ" : {"SN" : str, "LN" : int, "AS" : str,
"M5" : str, "SP" : str, "UR" : str,
"AH" : str,},
"AH" : str, "TP" : str, "DS" : str,
"AN" : str,},
"RG" : {"ID" : str, "CN" : str, "DS" : str,
"DT" : str, "FO" : str, "KS" : str,
"LB" : str, "PG" : str, "PI" : str,
"PL" : str, "PM" : str, "PU" : str,
"SM" : str,},
"SM" : str, "BC" : str,},
"PG" : {"ID" : str, "PN" : str, "CL" : str,
"PP" : str, "DS" : str, "VN" : str,},}

# output order of fields within records. Ensure that CL is at
# the end as parsing a CL will ignore any subsequent records.
VALID_HEADER_ORDER = {"HD" : ("VN", "SO", "GO"),
VALID_HEADER_ORDER = {"HD" : ("VN", "SO", "SS", "GO"),
"SQ" : ("SN", "LN", "AS", "M5",
"UR", "SP", "AH"),
"UR", "SP", "AH", "TP",
"DS", "AN"),
"RG" : ("ID", "CN", "SM", "LB",
"PU", "PI", "DT", "DS",
"PL", "FO", "KS", "PG",
"PM"),
"PM", "BC"),
"PG" : ("PN", "ID", "VN", "PP",
"DS", "CL"),}

Expand Down
4 changes: 2 additions & 2 deletions tests/AlignmentFileHeader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class TestHeaderConstruction(unittest.TestCase):

header_dict = odict(
[('SQ', [odict([('LN', 1575), ('SN', 'chr1'), ('AH', 'chr1:5000000-5010000')]),
odict([('LN', 1584), ('SN', 'chr2'), ('AH', '*')])]),
odict([('LN', 1584), ('SN', 'chr2'), ('AH', '*'), ('TP', 'linear')])]),
('RG', [odict([('LB', 'SC_1'), ('ID', 'L1'), ('SM', 'NA12891'),
('PU', 'SC_1_10'), ("CN", "name:with:colon")]),
odict([('LB', 'SC_2'), ('ID', 'L2'), ('SM', 'NA12891'),
Expand All @@ -37,7 +37,7 @@ class TestHeaderConstruction(unittest.TestCase):

header_text = ("@HD\tVN:1.0\n"
"@SQ\tSN:chr1\tLN:1575\tAH:chr1:5000000-5010000\n"
"@SQ\tSN:chr2\tLN:1584\tAH:*\n"
"@SQ\tSN:chr2\tLN:1584\tAH:*\tTP:linear\n"
"@RG\tID:L1\tPU:SC_1_10\tLB:SC_1\tSM:NA12891\tCN:name:with:colon\n"
"@RG\tID:L2\tPU:SC_2_12\tLB:SC_2\tSM:NA12891\tCN:name:with:colon\n"
"@PG\tID:P1\tVN:1.0\n"
Expand Down

0 comments on commit cdc0ed1

Please sign in to comment.