-
Notifications
You must be signed in to change notification settings - Fork 0
/
pathmapper.h
153 lines (120 loc) · 4.18 KB
/
pathmapper.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/*
* Copyright (C) 2015 by Glenn Hickey (hickey@soe.ucsc.edu)
*
* Released under the MIT license, see LICENSE.cactus
*/
#ifndef _PATHMAPPER_H
#define _PATHMAPPER_H
#include <string>
#include <map>
#include <vector>
#include "vglight.h"
#include "sglookup.h"
#include "sidegraph.h"
/** iteratively map vg paths onto a sidegraph
*/
class PathMapper
{
public:
PathMapper();
~PathMapper();
/** load the vg */
void init(const VGLight* vg);
/** get the side graph we made */
const SideGraph* getSideGraph() const;
/** get a chunk of DNA sequence from the side graph */
std::string getSideGraphDNA(sg_int_t seqID, sg_int_t offset = 0,
sg_int_t length = -1, bool reversed = false)
const;
/** get the DNA sequence of a path in the *side graph* */
std::string getSideGraphPathDNA(const std::string& pathName) const;
/** get the path in the Side Graph that corresponds to an added VG path
*/
const std::vector<SGSegment>& getSideGraphPath(
const std::string& vgPathName) const;
/** get the name of the VG path from which a Side Graph sequence was
* derived */
const std::string& getVGPathName(const SGSequence* seq) const;
/** add a path by name (leave control of order of addition to
* calling code) */
void addPath(const std::string& name,
const VGLight::MappingList& mappings);
/** add a set of paths that span edges not covered by existing paths
*/
void addSpanningPaths();
/** was path created using addSpanningPath()? if so, we probably
* dont want to write it */
bool isSpanningPath(sg_int_t id) const;
/** get name of path (index is the order in which the VG path
* was added) */
const std::string& getPathName(sg_int_t id) const;
/** number of paths that were added using addPath */
size_t getNumPaths() const;
/** throw an exception if side graph path's dna doesn't jive with
* vg path's dna*/
void verifyPaths() const;
protected:
/** add a segment corresponding to an input node */
void addSegment(sg_int_t pathID, sg_int_t pathPos,
const vg::Position& pos, bool reversed,
sg_int_t segLength);
/** second pass of input path to compute joins and side graph
* segments. */
void addPathJoins(const std::string& name,
const VGLight::MappingList& mappings);
/** append path onto the end of prevPath, merging the last segment
* of prevPath with first segment of nextPath if possible */
void mergePaths(std::vector<SGSegment>& prevPath,
const std::vector<SGSegment>& path) const;
std::string makeSeqName(sg_int_t pathID, sg_int_t pathPos);
sg_int_t getPathID(const std::string& name) const;
/** make a unique spanning path name */
std::string getSpanningPathName() const;
SideGraph* _sg;
SGLookup* _lookup;
const VGLight* _vg;
std::vector<std::string> _pathNames;
std::map<std::string, sg_int_t> _pathIDs;
std::vector<std::string> _seqStrings;
std::vector<std::vector<SGSegment> > _sgPaths;
// make sure node ids are in range [0, numNodes)
// note to self- some of the maps should be hash tables
std::map<int64_t, sg_int_t> _nodeIDMap;
SGSequence* _curSeq;
std::vector<sg_int_t> _sgSeqToVGPathID;
std::map<sg_int_t, VGLight::MappingList> _spanningPaths;
};
inline const SideGraph* PathMapper::getSideGraph() const
{
return _sg;
}
inline const std::string& PathMapper::getPathName(sg_int_t id) const
{
assert(id >=0 && id < _pathNames.size());
return _pathNames[id];
}
inline size_t PathMapper::getNumPaths() const
{
return _pathNames.size();
}
inline sg_int_t PathMapper::getPathID(const std::string& name) const
{
std::map<std::string, sg_int_t>::const_iterator i = _pathIDs.find(name);
assert(i != _pathIDs.end());
return i->second;
}
inline bool PathMapper::isSpanningPath(sg_int_t id) const
{
return _spanningPaths.find(id) != _spanningPaths.end();
}
inline std::ostream& operator<<(std::ostream& os,
const std::vector<SGSegment>& segs)
{
os << "(";
for (int i = 0; i < segs.size(); ++i)
{
os << segs[i] << (i < segs.size() - 1 ? "," : ")");
}
return os;
}
#endif