From 3ae52c8cfdb1f8d6e887e9c8c980bbbec7fcb246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Tom=C3=A1s?= Date: Tue, 26 May 2020 03:25:05 -0300 Subject: Add partial support for utf-8 --- point.cpp | 112 ++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 83 insertions(+), 29 deletions(-) (limited to 'point.cpp') diff --git a/point.cpp b/point.cpp index e31f5de..e03b43e 100644 --- a/point.cpp +++ b/point.cpp @@ -1,14 +1,11 @@ #include +#include #include -struct Point { +class Point { Page *page; uint16_t index; - Point() : page(nullptr), index(0) {} - Point(Page *page) : page(page), index(0) {} - Point(const Point& p) : page(p.page), index(p.index) {} - uint16_t index_to_offset() { if (index < page->gap_start) { return index; @@ -17,28 +14,20 @@ struct Point { } } - bool operator==(Point p) { - return page == p.page && index == p.index; + uint8_t byte() { + return index == page->element_count ? (page->next ? page->next->elements[0] : 0) : page->elements[index_to_offset()]; } - uint8_t element() { - if (index == page->element_count) { - return !page->next ? 0 : page->next->elements[0]; - } else { - return page->elements[index_to_offset()]; - } - } - - void operator++(int) { + void next_byte() { if (index < page->element_count) { index++; } else if (page->next) { - index = 1; page = page->next; + index = 1; } } - void operator--(int) { + void prev_byte() { if (index > 1) { index--; } else if (page->prev) { @@ -49,10 +38,75 @@ struct Point { } } + size_t rune_type() { + if (byte() & 1 << 7) { + if (byte() & 1 << 6) { + if (byte() & 1 << 5) { + if (byte() & 1 << 4) { + return 4; + } else { + return 3; + } + } else { + return 2; + } + } else { + return 0; + } + } else { + return 1; + } + } + + public: + + Point() : page(new Page()), index(0) {} + Point(const Point& p) : page(p.page), index(p.index) {} + + bool operator==(Point p) { + return page == p.page && index == p.index; + } + + bool operator!=(Point p) { + return page != p.page || index != p.index; + } + + bool at_start() { + return index == 0; + } + + bool at_end() { + return index == page->element_count && !page->next; + } + + void operator++(int) { + do { + next_byte(); + } while (!rune_type()); + } + + void operator--(int) { + do { + prev_byte(); + } while (!rune_type()); + } + + wchar_t element() { + size_t type = rune_type(); + wchar_t rune = byte() & (0xff >> type); + Point iter(*this); + for (size_t i = 1; i < type; i++) { + rune <<= 6; + iter.next_byte(); + rune |= (iter.byte() & 0x3f); + } + return rune; + } + uint64_t seek(uint8_t c, uint64_t limit) { uint64_t travel_distance = 0; - while (element() && element() != c && travel_distance < limit) { - (*this)++; + while (!at_end() && byte() != c && travel_distance < limit) { + next_byte(); travel_distance++; } return travel_distance; @@ -60,8 +114,8 @@ struct Point { uint64_t rseek(uint8_t c, uint64_t limit) { uint64_t travel_distance = 0; - while (index != 0 && element() != c && travel_distance < limit) { - (*this)--; + while (!at_start() && byte() != c && travel_distance < limit) { + prev_byte(); travel_distance++; } return travel_distance; @@ -69,15 +123,15 @@ struct Point { void align_gap() { while (page->gap_end < index_to_offset()) { - (*page)++; + page->move_gap_forward(); } while (page->gap_end > index_to_offset()) { - (*page)--; + page->move_gap_backward(); } } void push(uint8_t c) { - if (page->gap_start == page->gap_end) { + if (page->is_full()) { page->split(); if (index >= PAGE_SIZE / 2) { page = page->next; @@ -86,13 +140,13 @@ struct Point { } align_gap(); page->push(c); - (*this)++; + next_byte(); } void pop() { if (page->element_count == 1 && index == 1) { if (page->prev) { - (*this)--; + prev_byte(); delete page->next; } else if (page->next) { page->next->copy_to(page); @@ -100,12 +154,12 @@ struct Point { index = 0; } else { page->pop(); - index = 0; + prev_byte(); } } else if (index > 0) { align_gap(); page->pop(); - (*this)--; + prev_byte(); } } -- cgit v1.2.3