CCF
Loading...
Searching...
No Matches
contiguous_set.h
Go to the documentation of this file.
1// Copyright (c) Microsoft Corporation. All rights reserved.
2// Licensed under the Apache 2.0 License.
3#pragma once
4
5#define FMT_HEADER_ONLY
6#include <fmt/format.h>
7#include <numeric>
8#include <vector>
9
10namespace ccf::ds
11{
12 // Dense representation of an ordered set of values, assuming it contains
13 // some contiguous ranges of adjacent values. Stores a sequence of ranges,
14 // rather than individual values.
15 template <typename T>
17 {
18 public:
19 // Ranges are represented by their first value, and a count of additional
20 // values. This disallows negative ranges
21 using Range = std::pair<T, size_t>;
22 using Ranges = std::vector<Range>;
23
24 // Define an iterator for accessing each contained element, rather than the
25 // ranges
27 {
28 using iterator_category = std::random_access_iterator_tag;
29 using value_type = size_t;
30 using difference_type = size_t;
31 using pointer = const size_t*;
32 using reference = size_t;
33
34 using RangeIt = typename Ranges::const_iterator;
35
37 size_t offset = 0;
38
39 ConstIterator(RangeIt i, size_t o = 0) : it(i), offset(o) {}
40
41 T operator*() const
42 {
43 return it->first + offset;
44 }
45
46 bool operator==(const ConstIterator& other) const
47 {
48 return (it == other.it && offset == other.offset);
49 }
50
51 bool operator!=(const ConstIterator& other) const
52 {
53 return !(*this == other);
54 }
55
57 {
58 ++offset;
59 if (offset > it->second)
60 {
61 ++it;
62 offset = 0;
63 }
64 return (*this);
65 }
66
68 {
69 auto temp(*this);
70 ++(*this);
71 return temp;
72 }
73
75 {
76 if (offset == 0)
77 {
78 it = std::prev(it);
79 offset = it->second;
80 }
81 else
82 {
83 --offset;
84 }
85 return (*this);
86 }
87
89 {
90 auto temp(*this);
91 --(*this);
92 return temp;
93 }
94
96 {
97 if (n_ < 0)
98 {
99 return (*this) -= (size_t)-n_;
100 }
101 else
102 {
103 size_t n = n_;
104 while (offset + n > it->second)
105 {
106 n -= (it->second - offset + 1);
107 it = std::next(it);
108 offset = 0;
109 }
110 offset += n;
111 return (*this);
112 }
113 }
114
115 ConstIterator operator+(size_t n) const
116 {
117 ConstIterator copy(it, offset);
118 copy += n;
119 return copy;
120 }
121
122 friend ConstIterator operator+(size_t n, const ConstIterator& other)
123 {
124 return other + n;
125 }
126
128 {
129 while (n > offset)
130 {
131 n -= (offset + 1);
132 it = std::prev(it);
133 offset = it->second;
134 }
135 offset -= n;
136 return (*this);
137 }
138
139 ConstIterator operator-(size_t n) const
140 {
141 ConstIterator copy(it, offset);
142 copy -= n;
143 return copy;
144 }
145
147 {
148 if (it == other.it)
149 {
150 // In same range, simple diff
151 return offset - other.offset;
152 }
153 else if (it < other.it)
154 {
155 return -(other - (*this));
156 }
157 else
158 {
159 // it > other.it
160 // Walk from this->it to other.it, summing all of the ranges that are
161 // passed
162 difference_type sum = std::accumulate(
163 std::reverse_iterator(it),
164 std::prev(std::reverse_iterator(other.it)),
165 offset + 1,
166 [](difference_type acc, const auto& range) {
167 return acc + range.second + 1;
168 });
169 sum += other.it->second - other.offset;
170 return sum;
171 }
172 }
173 };
174
175 private:
176 Ranges ranges;
177
178 template <typename It>
179 void init_from_iterators(It begin, It end)
180 {
181 if (!std::is_sorted(begin, end))
182 {
183 throw std::logic_error("Range must be sorted");
184 }
185
186 while (begin != end)
187 {
188 auto next = std::adjacent_find(
189 begin, end, [](const T& a, const T& b) { return a + 1 != b; });
190 if (next == end)
191 {
192 ranges.emplace_back(*begin, size_t(std::distance(begin, end)) - 1);
193 break;
194 }
195 ranges.emplace_back(*begin, size_t(std::distance(begin, next)));
196 begin = std::next(next);
197 }
198 }
199
200 void init_from_iterators(
201 const ConstIterator& begin, const ConstIterator& end)
202 {
203 // If they're in different ranges...
204 if (begin.it != end.it)
205 {
206 // first insert the end of the initial range
207 ranges.emplace_back(
208 begin.it->first + begin.offset, begin.it->second - begin.offset);
209
210 // then insert all intermediate ranges, by direct copies
211 ranges.insert(ranges.end(), std::next(begin.it), end.it);
212
213 // finally handle the final range; insert part of it if it is non-empty
214 if (end.offset != 0)
215 {
216 ranges.emplace_back(end.it->first, end.offset - 1);
217 }
218 }
219 else
220 {
221 if (begin.offset < end.offset)
222 {
223 ranges.emplace_back(
224 begin.it->first + begin.offset, end.offset - begin.offset - 1);
225 }
226 }
227 }
228
229 void maybe_merge_with_following(typename Ranges::iterator it)
230 {
231 if (it != ranges.end())
232 {
233 auto next_it = std::next(it);
234 if (next_it != ranges.end())
235 {
236 if (it->first + it->second + 1 == next_it->first)
237 {
238 it->second = it->second + 1 + next_it->second;
239 ranges.erase(next_it);
240 }
241 }
242 }
243 }
244
245 void maybe_merge_with_following(typename Ranges::reverse_iterator it)
246 {
247 if (it != ranges.rend())
248 {
249 maybe_merge_with_following(std::next(it).base());
250 }
251 }
252
253 typename Ranges::const_iterator find_internal(const T& t) const
254 {
255 Range estimated_range{t, 0};
256 auto it = std::lower_bound(ranges.begin(), ranges.end(), estimated_range);
257 if (it != ranges.end())
258 {
259 // If lower_bound found {t, n}, then return that result
260 if (it->first == t)
261 {
262 return it;
263 }
264 }
265
266 // else, most of the time, we found {x, n}, where x > t. Check if there
267 // is a previous range, and if that contains t
268 if (it != ranges.begin())
269 {
270 it = std::prev(it);
271 const T& from = it->first;
272 const T additional = it->second;
273 if (from + additional >= t)
274 {
275 return it;
276 }
277 }
278
279 return ranges.end();
280 }
281
282 public:
283 ContiguousSet() = default;
284
285 template <typename It>
287 {
288 init_from_iterators(std::forward<It>(begin), std::forward<It>(end));
289 }
290
291 ContiguousSet(const T& from, size_t additional)
292 {
293 ranges.emplace_back(from, additional);
294 }
295
296 bool operator==(const ContiguousSet& other) const
297 {
298 return ranges == other.ranges;
299 }
300
301 bool operator!=(const ContiguousSet& other) const
302 {
303 return !(*this == other);
304 }
305
306 const Ranges& get_ranges() const
307 {
308 return ranges;
309 }
310
311 size_t size() const
312 {
313 return std::accumulate(
314 ranges.begin(), ranges.end(), 0u, [](size_t n, const Range& r) {
315 return n + r.second + 1;
316 });
317 }
318
319 bool empty() const
320 {
321 return ranges.empty();
322 }
323
324 bool insert(const T& t)
325 {
326 // Search backwards, to find the range with the highest starting point
327 // lower than this value. Offset by one, to find ranges adjacent to this
328 // value. eg - if inserting 5 into [{2, 1}, {6, 2}, {10, 2}], we want to
329 // find {6, 2}, and extend this range down by 1
330 const Range estimated_range(t + 1, 0);
331 auto it = std::lower_bound(
332 ranges.rbegin(), ranges.rend(), estimated_range, std::greater<>());
333
334 if (it != ranges.rend())
335 {
336 const T& from = it->first;
337 const T additional = it->second;
338 if (from <= t && t <= from + additional)
339 {
340 // Already present
341 return false;
342 }
343 else if (from + additional + 1 == t)
344 {
345 // Adjacent to the end of the existing range
346 it->second++;
347 maybe_merge_with_following(it);
348 return true;
349 }
350 else if (t + 1 == from)
351 {
352 // Precedes directly, extend this range by 1
353 it->first = t;
354 it->second++;
355 if (it != ranges.rend())
356 {
357 maybe_merge_with_following(std::next(it));
358 }
359 return true;
360 }
361 // Else fall through to emplace new entry
362 }
363
364 auto emplaced_it = ranges.emplace(it.base(), t, 0);
365 maybe_merge_with_following(emplaced_it);
366
367 return true;
368 }
369
370 bool erase(const T& t)
371 {
372 Range estimated_range{t, 0};
373 auto it = std::lower_bound(
374 ranges.begin(),
375 ranges.end(),
376 estimated_range,
377 // Custom comparator - ignore the second element
378 [](const Range& left, const Range& right) {
379 return left.first < right.first;
380 });
381
382 // Usually this has found the iterator _after_ the range containing t, and
383 // so we want std::prev(it). The only time when that is not the case are
384 // if it == ranges.begin() (there is no prev), or t == it->first()
385 // (lower_bound returned the correct iterator, because t is the start of a
386 // range). The latter must be additionally guarded by a check that we're
387 // not dereferencing an end iterator.
388 if (it != ranges.begin() && (it == ranges.end() || t != it->first))
389 {
390 it = std::prev(it);
391 }
392
393 if (it != ranges.end())
394 {
395 const T& from = it->first;
396 const T additional = it->second;
397 if (from <= t && t <= from + additional)
398 {
399 // Contained within this range
400 if (from == t)
401 {
402 if (additional == 0u)
403 {
404 // Remove range entirely
405 ranges.erase(it);
406 return true;
407 }
408 else
409 {
410 // Shrink start of range
411 ++it->first;
412 --it->second;
413 return true;
414 }
415 }
416 else if (t == from + additional)
417 {
418 // Shrink end of range
419 --it->second;
420 return true;
421 }
422 else
423 {
424 const auto before = t - it->first - 1;
425 const auto after = it->first + it->second - t - 1;
426
427 it->second = before;
428
429 auto next_it = std::next(it);
430 ranges.emplace(next_it, t + 1, after);
431 return true;
432 }
433 }
434 }
435
436 return false;
437 }
438
439 void extend(const T& from, size_t additional)
440 {
441 for (auto n = from; n <= from + additional; ++n)
442 {
443 const auto b = insert(n);
444 }
445 }
446
447 bool contains(const T& t) const
448 {
449 return find_internal(t) != end();
450 }
451
452 ConstIterator find(const T& t) const
453 {
454 auto it = find_internal(t);
455 if (it != ranges.end())
456 {
457 return ConstIterator(it, t - it->first);
458 }
459
460 return end();
461 }
462
463 ConstIterator lower_bound(const T& t) const
464 {
465 return std::lower_bound(begin(), end(), t);
466 }
467
468 ConstIterator upper_bound(const T& t) const
469 {
470 return std::upper_bound(begin(), end(), t);
471 }
472
473 void clear()
474 {
475 ranges.clear();
476 }
477
478 T front() const
479 {
480 return ranges.front().first;
481 }
482
483 T back() const
484 {
485 const auto back = ranges.back();
486 return back.first + back.second;
487 }
488
490 {
491 return ConstIterator(ranges.begin());
492 }
493
495 {
496 return ConstIterator(ranges.end());
497 }
498 };
499}
500
501FMT_BEGIN_NAMESPACE
502template <typename T>
503struct formatter<ccf::ds::ContiguousSet<T>>
504{
505 template <typename ParseContext>
506 constexpr auto parse(ParseContext& ctx)
507 {
508 return ctx.begin();
509 }
510
511 template <typename FormatContext>
512 auto format(const ccf::ds::ContiguousSet<T>& v, FormatContext& ctx) const
513 {
514 std::vector<std::string> ranges;
515 for (const auto& [from, additional] : v.get_ranges())
516 {
517 ranges.emplace_back(fmt::format("[{}->{}]", from, from + additional));
518 }
519 return format_to(
520 ctx.out(),
521 "{{{} values in {} ranges: {}}}",
522 v.size(),
523 v.get_ranges().size(),
524 fmt::join(ranges, ", "));
525 }
526};
527FMT_END_NAMESPACE
Definition contiguous_set.h:17
ConstIterator end() const
Definition contiguous_set.h:494
ContiguousSet(const T &from, size_t additional)
Definition contiguous_set.h:291
T back() const
Definition contiguous_set.h:483
bool erase(const T &t)
Definition contiguous_set.h:370
void extend(const T &from, size_t additional)
Definition contiguous_set.h:439
ContiguousSet(It &&begin, It &&end)
Definition contiguous_set.h:286
ConstIterator find(const T &t) const
Definition contiguous_set.h:452
bool insert(const T &t)
Definition contiguous_set.h:324
bool empty() const
Definition contiguous_set.h:319
T front() const
Definition contiguous_set.h:478
ConstIterator begin() const
Definition contiguous_set.h:489
bool operator==(const ContiguousSet &other) const
Definition contiguous_set.h:296
bool operator!=(const ContiguousSet &other) const
Definition contiguous_set.h:301
size_t size() const
Definition contiguous_set.h:311
bool contains(const T &t) const
Definition contiguous_set.h:447
const Ranges & get_ranges() const
Definition contiguous_set.h:306
std::pair< T, size_t > Range
Definition contiguous_set.h:21
ConstIterator lower_bound(const T &t) const
Definition contiguous_set.h:463
std::vector< Range > Ranges
Definition contiguous_set.h:22
ConstIterator upper_bound(const T &t) const
Definition contiguous_set.h:468
void clear()
Definition contiguous_set.h:473
Definition contiguous_set.h:11
Definition app_interface.h:15
Definition contiguous_set.h:27
ConstIterator & operator-=(size_t n)
Definition contiguous_set.h:127
size_t reference
Definition contiguous_set.h:32
friend ConstIterator operator+(size_t n, const ConstIterator &other)
Definition contiguous_set.h:122
ConstIterator(RangeIt i, size_t o=0)
Definition contiguous_set.h:39
ConstIterator operator+(size_t n) const
Definition contiguous_set.h:115
size_t difference_type
Definition contiguous_set.h:30
RangeIt it
Definition contiguous_set.h:36
ConstIterator operator++(int)
Definition contiguous_set.h:67
size_t value_type
Definition contiguous_set.h:29
const size_t * pointer
Definition contiguous_set.h:31
ConstIterator & operator--()
Definition contiguous_set.h:74
ConstIterator operator--(int)
Definition contiguous_set.h:88
T operator*() const
Definition contiguous_set.h:41
std::random_access_iterator_tag iterator_category
Definition contiguous_set.h:28
ConstIterator operator-(size_t n) const
Definition contiguous_set.h:139
ConstIterator & operator++()
Definition contiguous_set.h:56
size_t offset
Definition contiguous_set.h:37
bool operator==(const ConstIterator &other) const
Definition contiguous_set.h:46
ConstIterator & operator+=(difference_type n_)
Definition contiguous_set.h:95
typename Ranges::const_iterator RangeIt
Definition contiguous_set.h:34
difference_type operator-(const ConstIterator &other) const
Definition contiguous_set.h:146
bool operator!=(const ConstIterator &other) const
Definition contiguous_set.h:51
constexpr auto parse(ParseContext &ctx)
Definition contiguous_set.h:506
auto format(const ccf::ds::ContiguousSet< T > &v, FormatContext &ctx) const
Definition contiguous_set.h:512