CCF
Loading...
Searching...
No Matches
contiguous_set.h
Go to the documentation of this file.
1// Copyright (c) Microsoft Corporation. All rights reserved.
2// Licensed under the Apache 2.0 License.
3#pragma once
4
5#define FMT_HEADER_ONLY
6#include <fmt/format.h>
7#include <fmt/ranges.h>
8#include <numeric>
9#include <utility>
10#include <vector>
11
12namespace ccf::ds
13{
14 // Dense representation of an ordered set of values, assuming it contains
15 // some contiguous ranges of adjacent values. Stores a sequence of ranges,
16 // rather than individual values.
17 template <typename T>
19 {
20 public:
21 // Ranges are represented by their first value, and a count of additional
22 // values. This disallows negative ranges
23 using Range = std::pair<T, size_t>;
24 using Ranges = std::vector<Range>;
25
26 // Define an iterator for accessing each contained element, rather than the
27 // ranges
29 {
30 using iterator_category = std::random_access_iterator_tag;
31 using value_type = size_t;
32 using difference_type = ptrdiff_t;
33 using pointer = const size_t*;
34 using reference = size_t;
35
36 using RangeIt = typename Ranges::const_iterator;
37
39 size_t offset = 0;
40
41 ConstIterator(RangeIt i, size_t o = 0) : it(std::move(i)), offset(o) {}
42
43 T operator*() const
44 {
45 return it->first + offset;
46 }
47
48 bool operator==(const ConstIterator& other) const
49 {
50 return (it == other.it && offset == other.offset);
51 }
52
53 bool operator!=(const ConstIterator& other) const
54 {
55 return !(*this == other);
56 }
57
59 {
60 ++offset;
61 if (offset > it->second)
62 {
63 ++it;
64 offset = 0;
65 }
66 return (*this);
67 }
68
70 {
71 auto temp(*this);
72 ++(*this);
73 return temp;
74 }
75
77 {
78 if (offset == 0)
79 {
80 it = std::prev(it);
81 offset = it->second;
82 }
83 else
84 {
85 --offset;
86 }
87 return (*this);
88 }
89
91 {
92 auto temp(*this);
93 --(*this);
94 return temp;
95 }
96
98 {
99 if (n_ < 0)
100 {
101 // Avoid signed overflow: compute magnitude safely via unsigned
102 // arithmetic (two's complement negation).
103 return (*this) -= static_cast<size_t>(-(n_ + 1)) + 1;
104 }
105 auto n = static_cast<size_t>(n_);
106 while (offset + n > it->second)
107 {
108 n -= (it->second - offset + 1);
109 it = std::next(it);
110 offset = 0;
111 }
112 offset += n;
113 return (*this);
114 }
115
117 {
118 ConstIterator copy(it, offset);
119 copy += n;
120 return copy;
121 }
122
124 difference_type n, const ConstIterator& other)
125 {
126 return other + n;
127 }
128
130 {
131 while (n > offset)
132 {
133 n -= (offset + 1);
134 it = std::prev(it);
135 offset = it->second;
136 }
137 offset -= n;
138 return (*this);
139 }
140
142 {
143 ConstIterator copy(it, offset);
144 copy += -n;
145 return copy;
146 }
147
149 {
150 if (it == other.it)
151 {
152 // In same range, simple diff
153 return static_cast<difference_type>(offset) -
154 static_cast<difference_type>(other.offset);
155 }
156
157 if (it < other.it)
158 {
159 return -(other - (*this));
160 }
161
162 // it > other.it
163 // Walk from this->it to other.it, summing all of the ranges that are
164 // passed
165 size_t sum = std::accumulate(
166 std::reverse_iterator(it),
167 std::prev(std::reverse_iterator(other.it)),
168 offset + 1,
169 [](size_t acc, const auto& range) { return acc + range.second + 1; });
170 sum += other.it->second - other.offset;
171 return static_cast<difference_type>(sum);
172 }
173
174 bool operator<(const ConstIterator& other) const
175 {
176 if (it != other.it)
177 {
178 return it < other.it;
179 }
180 return offset < other.offset;
181 }
182
183 bool operator<=(const ConstIterator& other) const
184 {
185 return !(other < *this);
186 }
187
188 bool operator>(const ConstIterator& other) const
189 {
190 return other < *this;
191 }
192
193 bool operator>=(const ConstIterator& other) const
194 {
195 return !(*this < other);
196 }
197 };
198
199 private:
200 Ranges ranges;
201
202 template <typename It>
203 void init_from_iterators(It begin, It end)
204 {
205 if (!std::is_sorted(begin, end))
206 {
207 throw std::logic_error("Range must be sorted");
208 }
209
210 while (begin != end)
211 {
212 auto next = std::adjacent_find(
213 begin, end, [](const T& a, const T& b) { return a + 1 != b; });
214 if (next == end)
215 {
216 ranges.emplace_back(*begin, size_t(std::distance(begin, end)) - 1);
217 break;
218 }
219 ranges.emplace_back(*begin, size_t(std::distance(begin, next)));
220 begin = std::next(next);
221 }
222 }
223
224 void init_from_iterators(
225 const ConstIterator& begin, const ConstIterator& end)
226 {
227 // If they're in different ranges...
228 if (begin.it != end.it)
229 {
230 // first insert the end of the initial range
231 ranges.emplace_back(
232 begin.it->first + begin.offset, begin.it->second - begin.offset);
233
234 // then insert all intermediate ranges, by direct copies
235 ranges.insert(ranges.end(), std::next(begin.it), end.it);
236
237 // finally handle the final range; insert part of it if it is non-empty
238 if (end.offset != 0)
239 {
240 ranges.emplace_back(end.it->first, end.offset - 1);
241 }
242 }
243 else
244 {
245 if (begin.offset < end.offset)
246 {
247 ranges.emplace_back(
248 begin.it->first + begin.offset, end.offset - begin.offset - 1);
249 }
250 }
251 }
252
253 void maybe_merge_with_following(typename Ranges::iterator it)
254 {
255 if (it != ranges.end())
256 {
257 auto next_it = std::next(it);
258 if (next_it != ranges.end())
259 {
260 if (it->first + it->second + 1 == next_it->first)
261 {
262 it->second = it->second + 1 + next_it->second;
263 ranges.erase(next_it);
264 }
265 }
266 }
267 }
268
269 void maybe_merge_with_following(typename Ranges::reverse_iterator it)
270 {
271 if (it != ranges.rend())
272 {
273 maybe_merge_with_following(std::next(it).base());
274 }
275 }
276
277 [[nodiscard]] typename Ranges::const_iterator find_internal(
278 const T& t) const
279 {
280 Range estimated_range{t, 0};
281 auto it = std::lower_bound(ranges.begin(), ranges.end(), estimated_range);
282 if (it != ranges.end())
283 {
284 // If lower_bound found {t, n}, then return that result
285 if (it->first == t)
286 {
287 return it;
288 }
289 }
290
291 // else, most of the time, we found {x, n}, where x > t. Check if there
292 // is a previous range, and if that contains t
293 if (it != ranges.begin())
294 {
295 it = std::prev(it);
296 const T& from = it->first;
297 const T additional = it->second;
298 if (from + additional >= t)
299 {
300 return it;
301 }
302 }
303
304 return ranges.end();
305 }
306
307 public:
308 ContiguousSet() = default;
309
310 template <typename It>
312 {
313 init_from_iterators(std::forward<It>(begin), std::forward<It>(end));
314 }
315
316 ContiguousSet(const T& from, size_t additional)
317 {
318 ranges.emplace_back(from, additional);
319 }
320
321 bool operator==(const ContiguousSet& other) const
322 {
323 return ranges == other.ranges;
324 }
325
326 bool operator!=(const ContiguousSet& other) const
327 {
328 return !(*this == other);
329 }
330
331 [[nodiscard]] const Ranges& get_ranges() const
332 {
333 return ranges;
334 }
335
336 [[nodiscard]] size_t size() const
337 {
338 return std::accumulate(
339 ranges.begin(), ranges.end(), 0u, [](size_t n, const Range& r) {
340 return n + r.second + 1;
341 });
342 }
343
344 [[nodiscard]] bool empty() const
345 {
346 return ranges.empty();
347 }
348
349 bool insert(const T& t)
350 {
351 // Search backwards, to find the range with the highest starting point
352 // lower than this value. Offset by one, to find ranges adjacent to this
353 // value. eg - if inserting 5 into [{2, 1}, {6, 2}, {10, 2}], we want to
354 // find {6, 2}, and extend this range down by 1
355 const Range estimated_range(t + 1, 0);
356 auto it = std::lower_bound(
357 ranges.rbegin(), ranges.rend(), estimated_range, std::greater<>());
358
359 if (it != ranges.rend())
360 {
361 const T& from = it->first;
362 const T additional = it->second;
363 if (from <= t && t <= from + additional)
364 {
365 // Already present
366 return false;
367 }
368
369 if (from + additional + 1 == t)
370 {
371 // Adjacent to the end of the existing range
372 it->second++;
373 maybe_merge_with_following(it);
374 return true;
375 }
376
377 if (t + 1 == from)
378 {
379 // Precedes directly, extend this range by 1
380 it->first = t;
381 it->second++;
382 if (it != ranges.rend())
383 {
384 maybe_merge_with_following(std::next(it));
385 }
386 return true;
387 }
388 // Else fall through to emplace new entry
389 }
390
391 auto emplaced_it = ranges.emplace(it.base(), t, 0);
392 maybe_merge_with_following(emplaced_it);
393
394 return true;
395 }
396
397 bool erase(const T& t)
398 {
399 Range estimated_range{t, 0};
400 auto it = std::lower_bound(
401 ranges.begin(),
402 ranges.end(),
403 estimated_range,
404 // Custom comparator - ignore the second element
405 [](const Range& left, const Range& right) {
406 return left.first < right.first;
407 });
408
409 // Usually this has found the iterator _after_ the range containing t, and
410 // so we want std::prev(it). The only time when that is not the case are
411 // if it == ranges.begin() (there is no prev), or t == it->first()
412 // (lower_bound returned the correct iterator, because t is the start of a
413 // range). The latter must be additionally guarded by a check that we're
414 // not dereferencing an end iterator.
415 if (it != ranges.begin() && (it == ranges.end() || t != it->first))
416 {
417 it = std::prev(it);
418 }
419
420 if (it != ranges.end())
421 {
422 const T& from = it->first;
423 const T additional = it->second;
424 if (from <= t && t <= from + additional)
425 {
426 // Contained within this range
427 if (from == t)
428 {
429 if (additional == 0u)
430 {
431 // Remove range entirely
432 ranges.erase(it);
433 return true;
434 }
435 // Shrink start of range
436 ++it->first;
437 --it->second;
438 return true;
439 }
440
441 if (t == from + additional)
442 {
443 // Shrink end of range
444 --it->second;
445 return true;
446 }
447
448 const auto before = t - it->first - 1;
449 const auto after = it->first + it->second - t - 1;
450
451 it->second = before;
452
453 auto next_it = std::next(it);
454 ranges.emplace(next_it, t + 1, after);
455 return true;
456 }
457 }
458
459 return false;
460 }
461
462 void extend(const T& from, size_t additional)
463 {
464 for (auto n = from; n <= from + additional; ++n)
465 {
466 const auto b = insert(n);
467 }
468 }
469
470 [[nodiscard]] bool contains(const T& t) const
471 {
472 return find_internal(t) != end().it;
473 }
474
475 [[nodiscard]] ConstIterator find(const T& t) const
476 {
477 auto it = find_internal(t);
478 if (it != ranges.end())
479 {
480 return ConstIterator(it, t - it->first);
481 }
482
483 return end();
484 }
485
486 [[nodiscard]] ConstIterator lower_bound(const T& t) const
487 {
488 return std::lower_bound(begin(), end(), t);
489 }
490
491 [[nodiscard]] ConstIterator upper_bound(const T& t) const
492 {
493 return std::upper_bound(begin(), end(), t);
494 }
495
496 void clear()
497 {
498 ranges.clear();
499 }
500
501 [[nodiscard]] T front() const
502 {
503 return ranges.front().first;
504 }
505
506 [[nodiscard]] T back() const
507 {
508 const auto back = ranges.back();
509 return back.first + back.second;
510 }
511
512 [[nodiscard]] ConstIterator begin() const
513 {
514 return ConstIterator(ranges.begin());
515 }
516
517 [[nodiscard]] ConstIterator end() const
518 {
519 return ConstIterator(ranges.end());
520 }
521 };
522}
523
524FMT_BEGIN_NAMESPACE
525template <typename T>
526struct formatter<ccf::ds::ContiguousSet<T>>
527{
528 template <typename ParseContext>
529 constexpr auto parse(ParseContext& ctx)
530 {
531 return ctx.begin();
532 }
533
534 template <typename FormatContext>
535 auto format(const ccf::ds::ContiguousSet<T>& v, FormatContext& ctx) const
536 {
537 std::vector<std::string> ranges;
538 for (const auto& [from, additional] : v.get_ranges())
539 {
540 ranges.emplace_back(fmt::format("[{}->{}]", from, from + additional));
541 }
542 return format_to(
543 ctx.out(),
544 "{{{} values in {} ranges: {}}}",
545 v.size(),
546 v.get_ranges().size(),
547 fmt::join(ranges, ", "));
548 }
549};
550FMT_END_NAMESPACE
Definition contiguous_set.h:19
ConstIterator end() const
Definition contiguous_set.h:517
ContiguousSet(const T &from, size_t additional)
Definition contiguous_set.h:316
T back() const
Definition contiguous_set.h:506
bool erase(const T &t)
Definition contiguous_set.h:397
void extend(const T &from, size_t additional)
Definition contiguous_set.h:462
ContiguousSet(It &&begin, It &&end)
Definition contiguous_set.h:311
ConstIterator find(const T &t) const
Definition contiguous_set.h:475
bool insert(const T &t)
Definition contiguous_set.h:349
bool empty() const
Definition contiguous_set.h:344
T front() const
Definition contiguous_set.h:501
ConstIterator begin() const
Definition contiguous_set.h:512
bool operator==(const ContiguousSet &other) const
Definition contiguous_set.h:321
bool operator!=(const ContiguousSet &other) const
Definition contiguous_set.h:326
size_t size() const
Definition contiguous_set.h:336
bool contains(const T &t) const
Definition contiguous_set.h:470
const Ranges & get_ranges() const
Definition contiguous_set.h:331
std::pair< T, size_t > Range
Definition contiguous_set.h:23
ConstIterator lower_bound(const T &t) const
Definition contiguous_set.h:486
std::vector< Range > Ranges
Definition contiguous_set.h:24
ConstIterator upper_bound(const T &t) const
Definition contiguous_set.h:491
void clear()
Definition contiguous_set.h:496
Definition contiguous_set.h:13
Definition app_interface.h:13
STL namespace.
Definition contiguous_set.h:29
ConstIterator operator-(difference_type n) const
Definition contiguous_set.h:141
bool operator<=(const ConstIterator &other) const
Definition contiguous_set.h:183
ConstIterator operator+(difference_type n) const
Definition contiguous_set.h:116
ConstIterator & operator-=(size_t n)
Definition contiguous_set.h:129
size_t reference
Definition contiguous_set.h:34
friend ConstIterator operator+(difference_type n, const ConstIterator &other)
Definition contiguous_set.h:123
ConstIterator(RangeIt i, size_t o=0)
Definition contiguous_set.h:41
bool operator<(const ConstIterator &other) const
Definition contiguous_set.h:174
ptrdiff_t difference_type
Definition contiguous_set.h:32
bool operator>=(const ConstIterator &other) const
Definition contiguous_set.h:193
RangeIt it
Definition contiguous_set.h:38
ConstIterator operator++(int)
Definition contiguous_set.h:69
size_t value_type
Definition contiguous_set.h:31
const size_t * pointer
Definition contiguous_set.h:33
ConstIterator & operator--()
Definition contiguous_set.h:76
ConstIterator operator--(int)
Definition contiguous_set.h:90
T operator*() const
Definition contiguous_set.h:43
std::random_access_iterator_tag iterator_category
Definition contiguous_set.h:30
bool operator>(const ConstIterator &other) const
Definition contiguous_set.h:188
ConstIterator & operator++()
Definition contiguous_set.h:58
size_t offset
Definition contiguous_set.h:39
bool operator==(const ConstIterator &other) const
Definition contiguous_set.h:48
ConstIterator & operator+=(difference_type n_)
Definition contiguous_set.h:97
typename Ranges::const_iterator RangeIt
Definition contiguous_set.h:36
difference_type operator-(const ConstIterator &other) const
Definition contiguous_set.h:148
bool operator!=(const ConstIterator &other) const
Definition contiguous_set.h:53
constexpr auto parse(ParseContext &ctx)
Definition contiguous_set.h:529
auto format(const ccf::ds::ContiguousSet< T > &v, FormatContext &ctx) const
Definition contiguous_set.h:535