2929
3030namespace km {
3131
32+ enum class KHistType { UNIQUE, TOTAL };
33+
3234class KHist
3335{
36+ template <size_t buf_size>
37+ friend class HistWriter ;
38+
39+ template <size_t buf_size>
40+ friend class HistReader ;
41+
3442public:
35- KHist () {}
43+ KHist () = default ;
3644 KHist (int idx, size_t ksize, size_t lower, size_t upper)
37- : idx (idx), ksize (ksize), lower (lower), upper (upper)
45+ : m_idx (idx), m_ksize (ksize), m_lower (lower), m_upper (upper)
3846 {
39- hist_u .resize (upper-lower+ 1 , 0 );
40- hist_n .resize (upper-lower+ 1 , 0 );
47+ m_hist_u .resize (m_upper - m_lower + 1 , 0 );
48+ m_hist_n .resize (m_upper - m_lower + 1 , 0 );
4149 }
4250
4351 void inc (uint64_t count)
4452 {
45- std::unique_lock<std::mutex> lock (m_mutex);
46- uniq++;
47- total += count;
48- if (count < lower)
53+ m_uniq++;
54+ m_total += count;
55+ if (count < m_lower)
4956 {
50- oob_lu ++;
51- oob_ln+= count;
57+ m_oob_lu ++;
58+ m_oob_ln += count;
5259 }
53- else if (count > upper )
60+ else if (count > m_upper )
5461 {
55- oob_uu ++;
56- oob_un+= count;
62+ m_oob_uu ++;
63+ m_oob_un += count;
5764 }
5865 else
5966 {
60- hist_u [count-lower ]++;
61- hist_n [count-lower]+= count;
67+ m_hist_u [count - m_lower ]++;
68+ m_hist_n [count - m_lower] += count;
6269 }
6370 }
6471
65- void print_histu ( )
72+ void set_type (KHistType type )
6673 {
67- _print (hist_u) ;
74+ m_type = type ;
6875 }
6976
70- void print_histn ()
77+ std::shared_ptr<KHist> clone ()
7178 {
72- _print (hist_n);
79+ m_clone = true ;
80+ std::shared_ptr<KHist> hist = std::make_shared<KHist>(m_idx, m_ksize, m_lower, m_upper);
81+ m_clones.push_back (hist);
82+ return hist;
7383 }
7484
75- private:
76- void _print (const std::vector<uint64_t >& v)
85+ uint64_t unique () const { return m_uniq; }
86+ uint64_t total () const { return m_total; }
87+ uint64_t lower () const { return m_lower; }
88+ uint64_t upper () const { return m_upper; }
89+
90+ uint64_t oob_lower_unique () const { return m_oob_lu; }
91+ uint64_t oob_upper_unique () const { return m_oob_uu; }
92+ uint64_t oob_lower_total () const { return m_oob_ln; }
93+ uint64_t oob_upper_total () const { return m_oob_un; }
94+
95+ uint32_t kmer_size () const { return m_ksize; }
96+ uint32_t idx () const { return m_idx; }
97+
98+ int64_t get_count (size_t c, KHistType type) const
99+ {
100+ if ((c < m_lower) || (c > m_upper))
101+ return -1 ;
102+ if (type == KHistType::UNIQUE)
103+ return m_hist_u[c];
104+ return m_hist_n[c];
105+ }
106+
107+ const std::vector<uint64_t >& get_vec (KHistType type = KHistType::UNIQUE) const
108+ {
109+ if (type == KHistType::UNIQUE)
110+ return m_hist_u;
111+ return m_hist_n;
112+ }
113+
114+ void merge_clones ()
115+ {
116+ if (m_clone && !m_merged)
117+ {
118+ for (auto & h : m_clones)
119+ {
120+ m_uniq += h->m_uniq ;
121+ m_total += h->m_total ;
122+ m_oob_lu += h->m_oob_lu ;
123+ m_oob_uu += h->m_oob_uu ;
124+ m_oob_ln += h->m_oob_ln ;
125+ m_oob_un += h->m_oob_un ;
126+ for (size_t i=0 ; i<h->m_hist_u .size (); i++)
127+ {
128+ m_hist_u[i] += h->m_hist_u [i];
129+ m_hist_n[i] += h->m_hist_n [i];
130+ }
131+ }
132+ m_merged = true ;
133+ clear_clones ();
134+ }
135+ }
136+
137+ void clear_clones ()
138+ {
139+ m_clones.clear ();
140+ m_clone = false ;
141+ m_merged = false ;
142+ }
143+
144+ std::string as_string (KHistType type = KHistType::UNIQUE, const std::string sep = " \n " ) const
77145 {
78- uint64_t current = lower;
79- for_each (v.begin (), v.end (), [¤t](uint64_t c){
80- std::cerr << std::to_string (current) << " " << std::to_string (c) << " \n " ;
81- current++;
146+ std::stringstream ss;
147+ uint64_t count = 0 ;
148+ auto vec = m_type == KHistType::UNIQUE ? m_hist_u : m_hist_n;
149+ std::for_each (vec.begin (), vec.end (), [&count, &ss, &sep](uint64_t c){
150+ ss << std::to_string (count) << " " << std::to_string (c) << sep;
151+ count++;
82152 });
83- std::cerr << std::flush ;
153+ return ss. str () ;
84154 }
85155
86- public:
87- int32_t idx {0 };
88- uint32_t ksize {0 };
89- uint64_t lower {0 };
90- uint64_t upper {0 };
91- uint64_t uniq {0 };
92- uint64_t total {0 };
93- uint64_t oob_lu {0 };
94- uint64_t oob_uu {0 };
95- uint64_t oob_ln {0 };
96- uint64_t oob_un {0 };
97- std::vector<uint64_t > hist_u;
98- std::vector<uint64_t > hist_n;
99- std::mutex m_mutex;
156+ void print (std::ostream& output_stream, KHistType type = KHistType::UNIQUE, const std::string sep = " \n " ) const
157+ {
158+ output_stream << as_string (type, sep);
159+ }
160+
161+ auto begin ()
162+ {
163+ if (m_type == KHistType::UNIQUE)
164+ return m_hist_u.begin ();
165+ return m_hist_n.begin ();
166+ }
167+
168+ auto end ()
169+ {
170+ if (m_type == KHistType::UNIQUE)
171+ return m_hist_u.end ();
172+ return m_hist_n.end ();
173+ }
174+
175+ auto cbegin () const
176+ {
177+ if (m_type == KHistType::UNIQUE)
178+ return m_hist_u.cbegin ();
179+ return m_hist_n.cbegin ();
180+ }
181+
182+ auto cend ()
183+ {
184+ if (m_type == KHistType::UNIQUE)
185+ return m_hist_u.cend ();
186+ return m_hist_n.cend ();
187+ }
188+
189+ private:
190+ int32_t m_idx {0 };
191+ uint32_t m_ksize {0 };
192+ uint64_t m_lower {0 };
193+ uint64_t m_upper {0 };
194+ uint64_t m_uniq {0 };
195+ uint64_t m_total {0 };
196+ uint64_t m_oob_lu {0 };
197+ uint64_t m_oob_uu {0 };
198+ uint64_t m_oob_ln {0 };
199+ uint64_t m_oob_un {0 };
200+
201+ std::vector<uint64_t > m_hist_u;
202+ std::vector<uint64_t > m_hist_n;
203+ std::vector<std::shared_ptr<KHist>> m_clones;
204+ KHistType m_type {KHistType::UNIQUE};
205+ bool m_clone {false };
206+ bool m_merged {false };
100207};
101208
102209using hist_t = std::shared_ptr<KHist>;
@@ -109,15 +216,16 @@ inline std::vector<uint32_t> compute_merge_thresholds(std::vector<hist_t>& histo
109216 for (size_t h=0 ; h<histograms.size (); h++)
110217 {
111218 uint32_t sum = 0 ;
112- uint32_t n = histograms[h]->uniq * p;
113- for (size_t i=0 ; i<histograms[h]->hist_u .size (); i++)
219+ uint32_t n = histograms[h]->unique () * p;
220+ auto v = histograms[h]->get_vec (KHistType::UNIQUE);
221+ for (size_t i=0 ; i<v.size (); i++)
114222 {
115223 if (sum > n)
116224 {
117225 thresholds.push_back (i);
118226 break ;
119227 }
120- sum += histograms[h]-> hist_u [i];
228+ sum += v [i];
121229 }
122230 }
123231 std::ofstream out (path, std::ios::out); check_fstream_good (path, out);
@@ -128,4 +236,4 @@ inline std::vector<uint32_t> compute_merge_thresholds(std::vector<hist_t>& histo
128236 return thresholds;
129237}
130238
131- };
239+ };
0 commit comments