From afbf3c31f4d1a605c264f719531f4183ee5a3022 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Fri, 13 Oct 2023 15:05:49 +1100 Subject: Use libcppbor for much much nicer db encoding --- lib/libcppbor/include/cppbor/cppbor.h | 1141 +++++++++++++++++++++++++++ lib/libcppbor/include/cppbor/cppbor_parse.h | 195 +++++ 2 files changed, 1336 insertions(+) create mode 100644 lib/libcppbor/include/cppbor/cppbor.h create mode 100644 lib/libcppbor/include/cppbor/cppbor_parse.h (limited to 'lib/libcppbor/include/cppbor') diff --git a/lib/libcppbor/include/cppbor/cppbor.h b/lib/libcppbor/include/cppbor/cppbor.h new file mode 100644 index 00000000..f7a2af0c --- /dev/null +++ b/lib/libcppbor/include/cppbor/cppbor.h @@ -0,0 +1,1141 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef OS_WINDOWS +#include + +#define ssize_t SSIZE_T +#endif // OS_WINDOWS + +#ifdef TRUE +#undef TRUE +#endif // TRUE +#ifdef FALSE +#undef FALSE +#endif // FALSE + +namespace cppbor { + +enum MajorType : uint8_t { + UINT = 0 << 5, + NINT = 1 << 5, + BSTR = 2 << 5, + TSTR = 3 << 5, + ARRAY = 4 << 5, + MAP = 5 << 5, + SEMANTIC = 6 << 5, + SIMPLE = 7 << 5, +}; + +enum SimpleType { + BOOLEAN, + NULL_T, // Only two supported, as yet. +}; + +enum SpecialAddlInfoValues : uint8_t { + FALSE = 20, + TRUE = 21, + NULL_V = 22, + ONE_BYTE_LENGTH = 24, + TWO_BYTE_LENGTH = 25, + FOUR_BYTE_LENGTH = 26, + EIGHT_BYTE_LENGTH = 27, +}; + +class Item; +class Uint; +class Nint; +class Int; +class Tstr; +class Bstr; +class Simple; +class Bool; +class Array; +class Map; +class Null; +class SemanticTag; +class EncodedItem; +class ViewTstr; +class ViewBstr; + +/** + * Returns the size of a CBOR header that contains the additional info value addlInfo. + */ +size_t headerSize(uint64_t addlInfo); + +/** + * Encodes a CBOR header with the specified type and additional info into the range [pos, end). + * Returns a pointer to one past the last byte written, or nullptr if there isn't sufficient space + * to write the header. + */ +uint8_t* encodeHeader(MajorType type, uint64_t addlInfo, uint8_t* pos, const uint8_t* end); + +using EncodeCallback = std::function; + +/** + * Encodes a CBOR header with the specified type and additional info, passing each byte in turn to + * encodeCallback. + */ +void encodeHeader(MajorType type, uint64_t addlInfo, EncodeCallback encodeCallback); + +/** + * Encodes a CBOR header witht he specified type and additional info, writing each byte to the + * provided OutputIterator. + */ +template ::iterator_category>>> +void encodeHeader(MajorType type, uint64_t addlInfo, OutputIterator iter) { + return encodeHeader(type, addlInfo, [&](uint8_t v) { *iter++ = v; }); +} + +/** + * Item represents a CBOR-encodeable data item. Item is an abstract interface with a set of virtual + * methods that allow encoding of the item or conversion to the appropriate derived type. + */ +class Item { + public: + virtual ~Item() {} + + /** + * Returns the CBOR type of the item. + */ + virtual MajorType type() const = 0; + + // These methods safely downcast an Item to the appropriate subclass. + virtual Int* asInt() { return nullptr; } + const Int* asInt() const { return const_cast(this)->asInt(); } + virtual Uint* asUint() { return nullptr; } + const Uint* asUint() const { return const_cast(this)->asUint(); } + virtual Nint* asNint() { return nullptr; } + const Nint* asNint() const { return const_cast(this)->asNint(); } + virtual Tstr* asTstr() { return nullptr; } + const Tstr* asTstr() const { return const_cast(this)->asTstr(); } + virtual Bstr* asBstr() { return nullptr; } + const Bstr* asBstr() const { return const_cast(this)->asBstr(); } + virtual Simple* asSimple() { return nullptr; } + const Simple* asSimple() const { return const_cast(this)->asSimple(); } + virtual Bool* asBool() { return nullptr; } + const Bool* asBool() const { return const_cast(this)->asBool(); } + virtual Null* asNull() { return nullptr; } + const Null* asNull() const { return const_cast(this)->asNull(); } + + virtual Map* asMap() { return nullptr; } + const Map* asMap() const { return const_cast(this)->asMap(); } + virtual Array* asArray() { return nullptr; } + const Array* asArray() const { return const_cast(this)->asArray(); } + + virtual ViewTstr* asViewTstr() { return nullptr; } + const ViewTstr* asViewTstr() const { return const_cast(this)->asViewTstr(); } + virtual ViewBstr* asViewBstr() { return nullptr; } + const ViewBstr* asViewBstr() const { return const_cast(this)->asViewBstr(); } + + // Like those above, these methods safely downcast an Item when it's actually a SemanticTag. + // However, if you think you want to use these methods, you probably don't. Typically, the way + // you should handle tagged Items is by calling the appropriate method above (e.g. asInt()) + // which will return a pointer to the tagged Item, rather than the tag itself. If you want to + // find out if the Item* you're holding is to something with one or more tags applied, see + // semanticTagCount() and semanticTag() below. + virtual SemanticTag* asSemanticTag() { return nullptr; } + const SemanticTag* asSemanticTag() const { return const_cast(this)->asSemanticTag(); } + + /** + * Returns the number of semantic tags prefixed to this Item. + */ + virtual size_t semanticTagCount() const { return 0; } + + /** + * Returns the semantic tag at the specified nesting level `nesting`, iff `nesting` is less than + * the value returned by semanticTagCount(). + * + * CBOR tags are "nested" by applying them in sequence. The "rightmost" tag is the "inner" tag. + * That is, given: + * + * 4(5(6("AES"))) which encodes as C1 C2 C3 63 414553 + * + * The tstr "AES" is tagged with 6. The combined entity ("AES" tagged with 6) is tagged with 5, + * etc. So in this example, semanticTagCount() would return 3, and semanticTag(0) would return + * 5 semanticTag(1) would return 5 and semanticTag(2) would return 4. For values of n > 2, + * semanticTag(n) will return 0, but this is a meaningless value. + * + * If this layering is confusing, you probably don't have to worry about it. Nested tagging does + * not appear to be common, so semanticTag(0) is the only one you'll use. + */ + virtual uint64_t semanticTag(size_t /* nesting */ = 0) const { return 0; } + + /** + * Returns true if this is a "compound" item, i.e. one that contains one or more other items. + */ + virtual bool isCompound() const { return false; } + + bool operator==(const Item& other) const&; + bool operator!=(const Item& other) const& { return !(*this == other); } + + /** + * Returns the number of bytes required to encode this Item into CBOR. Note that if this is a + * complex Item, calling this method will require walking the whole tree. + */ + virtual size_t encodedSize() const = 0; + + /** + * Encodes the Item into buffer referenced by range [*pos, end). Returns a pointer to one past + * the last position written. Returns nullptr if there isn't enough space to encode. + */ + virtual uint8_t* encode(uint8_t* pos, const uint8_t* end) const = 0; + + /** + * Encodes the Item by passing each encoded byte to encodeCallback. + */ + virtual void encode(EncodeCallback encodeCallback) const = 0; + + /** + * Clones the Item + */ + virtual std::unique_ptr clone() const = 0; + + /** + * Encodes the Item into the provided OutputIterator. + */ + template ::iterator_category> + void encode(OutputIterator i) const { + return encode([&](uint8_t v) { *i++ = v; }); + } + + /** + * Encodes the Item into a new std::vector. + */ + std::vector encode() const { + std::vector retval; + retval.reserve(encodedSize()); + encode(std::back_inserter(retval)); + return retval; + } + + /** + * Encodes the Item into a new std::string. + */ + std::string toString() const { + std::string retval; + retval.reserve(encodedSize()); + encode([&](uint8_t v) { retval.push_back(v); }); + return retval; + } + + /** + * Encodes only the header of the Item. + */ + inline uint8_t* encodeHeader(uint64_t addlInfo, uint8_t* pos, const uint8_t* end) const { + return ::cppbor::encodeHeader(type(), addlInfo, pos, end); + } + + /** + * Encodes only the header of the Item. + */ + inline void encodeHeader(uint64_t addlInfo, EncodeCallback encodeCallback) const { + ::cppbor::encodeHeader(type(), addlInfo, encodeCallback); + } +}; + +/** + * EncodedItem represents a bit of already-encoded CBOR. Caveat emptor: It does no checking to + * ensure that the provided data is a valid encoding, cannot be meaninfully-compared with other + * kinds of items and you cannot use the as*() methods to find out what's inside it. + */ +class EncodedItem : public Item { + public: + explicit EncodedItem(std::vector value) : mValue(std::move(value)) {} + + bool operator==(const EncodedItem& other) const& { return mValue == other.mValue; } + + // Type can't be meaningfully-obtained. We could extract the type from the first byte and return + // it, but you can't do any of the normal things with an EncodedItem so there's no point. + MajorType type() const override { + assert(false); + return static_cast(-1); + } + size_t encodedSize() const override { return mValue.size(); } + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override { + if (end - pos < static_cast(mValue.size())) return nullptr; + return std::copy(mValue.begin(), mValue.end(), pos); + } + void encode(EncodeCallback encodeCallback) const override { + std::for_each(mValue.begin(), mValue.end(), encodeCallback); + } + std::unique_ptr clone() const override { return std::make_unique(mValue); } + + private: + std::vector mValue; +}; + +/** + * Int is an abstraction that allows Uint and Nint objects to be manipulated without caring about + * the sign. + */ +class Int : public Item { + public: + bool operator==(const Int& other) const& { return value() == other.value(); } + + virtual int64_t value() const = 0; + using Item::asInt; + Int* asInt() override { return this; } +}; + +/** + * Uint is a concrete Item that implements CBOR major type 0. + */ +class Uint : public Int { + public: + static constexpr MajorType kMajorType = UINT; + + explicit Uint(uint64_t v) : mValue(v) {} + + bool operator==(const Uint& other) const& { return mValue == other.mValue; } + + MajorType type() const override { return kMajorType; } + using Item::asUint; + Uint* asUint() override { return this; } + + size_t encodedSize() const override { return headerSize(mValue); } + + int64_t value() const override { return mValue; } + uint64_t unsignedValue() const { return mValue; } + + using Item::encode; + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override { + return encodeHeader(mValue, pos, end); + } + void encode(EncodeCallback encodeCallback) const override { + encodeHeader(mValue, encodeCallback); + } + + std::unique_ptr clone() const override { return std::make_unique(mValue); } + + private: + uint64_t mValue; +}; + +/** + * Nint is a concrete Item that implements CBOR major type 1. + + * Note that it is incapable of expressing the full range of major type 1 values, becaue it can only + * express values that fall into the range [std::numeric_limits::min(), -1]. It cannot + * express values in the range [std::numeric_limits::min() - 1, + * -std::numeric_limits::max()]. + */ +class Nint : public Int { + public: + static constexpr MajorType kMajorType = NINT; + + explicit Nint(int64_t v); + + bool operator==(const Nint& other) const& { return mValue == other.mValue; } + + MajorType type() const override { return kMajorType; } + using Item::asNint; + Nint* asNint() override { return this; } + size_t encodedSize() const override { return headerSize(addlInfo()); } + + int64_t value() const override { return mValue; } + + using Item::encode; + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override { + return encodeHeader(addlInfo(), pos, end); + } + void encode(EncodeCallback encodeCallback) const override { + encodeHeader(addlInfo(), encodeCallback); + } + + std::unique_ptr clone() const override { return std::make_unique(mValue); } + + private: + uint64_t addlInfo() const { return -1ll - mValue; } + + int64_t mValue; +}; + +/** + * Bstr is a concrete Item that implements major type 2. + */ +class Bstr : public Item { + public: + static constexpr MajorType kMajorType = BSTR; + + // Construct an empty Bstr + explicit Bstr() {} + + // Construct from a vector + explicit Bstr(std::vector v) : mValue(std::move(v)) {} + + // Construct from a string + explicit Bstr(const std::string& v) + : mValue(reinterpret_cast(v.data()), + reinterpret_cast(v.data()) + v.size()) {} + + // Construct from a pointer/size pair + explicit Bstr(const std::pair& buf) + : mValue(buf.first, buf.first + buf.second) {} + + // Construct from a pair of iterators + template ::iterator_category, + typename = typename std::iterator_traits::iterator_category> + explicit Bstr(const std::pair& pair) : mValue(pair.first, pair.second) {} + + // Construct from an iterator range. + template ::iterator_category, + typename = typename std::iterator_traits::iterator_category> + Bstr(I1 begin, I2 end) : mValue(begin, end) {} + + bool operator==(const Bstr& other) const& { return mValue == other.mValue; } + + MajorType type() const override { return kMajorType; } + using Item::asBstr; + Bstr* asBstr() override { return this; } + size_t encodedSize() const override { return headerSize(mValue.size()) + mValue.size(); } + using Item::encode; + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override; + void encode(EncodeCallback encodeCallback) const override { + encodeHeader(mValue.size(), encodeCallback); + encodeValue(encodeCallback); + } + + const std::vector& value() const { return mValue; } + std::vector&& moveValue() { return std::move(mValue); } + + std::unique_ptr clone() const override { return std::make_unique(mValue); } + + private: + void encodeValue(EncodeCallback encodeCallback) const; + + std::vector mValue; +}; + +/** + * ViewBstr is a read-only version of Bstr backed by std::string_view + */ +class ViewBstr : public Item { + public: + static constexpr MajorType kMajorType = BSTR; + + // Construct an empty ViewBstr + explicit ViewBstr() {} + + // Construct from a string_view of uint8_t values + explicit ViewBstr(std::basic_string_view v) : mView(std::move(v)) {} + + // Construct from a string_view + explicit ViewBstr(std::string_view v) + : mView(reinterpret_cast(v.data()), v.size()) {} + + // Construct from an iterator range + template ::iterator_category, + typename = typename std::iterator_traits::iterator_category> + ViewBstr(I1 begin, I2 end) : mView(begin, end) {} + + // Construct from a uint8_t pointer pair + ViewBstr(const uint8_t* begin, const uint8_t* end) + : mView(begin, std::distance(begin, end)) {} + + bool operator==(const ViewBstr& other) const& { return mView == other.mView; } + + MajorType type() const override { return kMajorType; } + using Item::asViewBstr; + ViewBstr* asViewBstr() override { return this; } + size_t encodedSize() const override { return headerSize(mView.size()) + mView.size(); } + using Item::encode; + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override; + void encode(EncodeCallback encodeCallback) const override { + encodeHeader(mView.size(), encodeCallback); + encodeValue(encodeCallback); + } + + const std::basic_string_view& view() const { return mView; } + + std::unique_ptr clone() const override { return std::make_unique(mView); } + + private: + void encodeValue(EncodeCallback encodeCallback) const; + + std::basic_string_view mView; +}; + +/** + * Tstr is a concrete Item that implements major type 3. + */ +class Tstr : public Item { + public: + static constexpr MajorType kMajorType = TSTR; + + // Construct from a string + explicit Tstr(std::string v) : mValue(std::move(v)) {} + + // Construct from a string_view + explicit Tstr(const std::string_view& v) : mValue(v) {} + + // Construct from a C string + explicit Tstr(const char* v) : mValue(std::string(v)) {} + + // Construct from a pair of iterators + template ::iterator_category, + typename = typename std::iterator_traits::iterator_category> + explicit Tstr(const std::pair& pair) : mValue(pair.first, pair.second) {} + + // Construct from an iterator range + template ::iterator_category, + typename = typename std::iterator_traits::iterator_category> + Tstr(I1 begin, I2 end) : mValue(begin, end) {} + + bool operator==(const Tstr& other) const& { return mValue == other.mValue; } + + MajorType type() const override { return kMajorType; } + using Item::asTstr; + Tstr* asTstr() override { return this; } + size_t encodedSize() const override { return headerSize(mValue.size()) + mValue.size(); } + using Item::encode; + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override; + void encode(EncodeCallback encodeCallback) const override { + encodeHeader(mValue.size(), encodeCallback); + encodeValue(encodeCallback); + } + + const std::string& value() const { return mValue; } + std::string&& moveValue() { return std::move(mValue); } + + std::unique_ptr clone() const override { return std::make_unique(mValue); } + + private: + void encodeValue(EncodeCallback encodeCallback) const; + + std::string mValue; +}; + +/** + * ViewTstr is a read-only version of Tstr backed by std::string_view + */ +class ViewTstr : public Item { + public: + static constexpr MajorType kMajorType = TSTR; + + // Construct an empty ViewTstr + explicit ViewTstr() {} + + // Construct from a string_view + explicit ViewTstr(std::string_view v) : mView(std::move(v)) {} + + // Construct from an iterator range + template ::iterator_category, + typename = typename std::iterator_traits::iterator_category> + ViewTstr(I1 begin, I2 end) : mView(begin, end) {} + + // Construct from a uint8_t pointer pair + ViewTstr(const uint8_t* begin, const uint8_t* end) + : mView(reinterpret_cast(begin), + std::distance(begin, end)) {} + + bool operator==(const ViewTstr& other) const& { return mView == other.mView; } + + MajorType type() const override { return kMajorType; } + using Item::asViewTstr; + ViewTstr* asViewTstr() override { return this; } + size_t encodedSize() const override { return headerSize(mView.size()) + mView.size(); } + using Item::encode; + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override; + void encode(EncodeCallback encodeCallback) const override { + encodeHeader(mView.size(), encodeCallback); + encodeValue(encodeCallback); + } + + const std::string_view& view() const { return mView; } + + std::unique_ptr clone() const override { return std::make_unique(mView); } + + private: + void encodeValue(EncodeCallback encodeCallback) const; + + std::string_view mView; +}; + +/* + * Array is a concrete Item that implements CBOR major type 4. + * + * Note that Arrays are not copyable. This is because copying them is expensive and making them + * move-only ensures that they're never copied accidentally. If you actually want to copy an Array, + * use the clone() method. + */ +class Array : public Item { + public: + static constexpr MajorType kMajorType = ARRAY; + + Array() = default; + Array(const Array& other) = delete; + Array(Array&&) = default; + Array& operator=(const Array&) = delete; + Array& operator=(Array&&) = default; + + bool operator==(const Array& other) const&; + + /** + * Construct an Array from a variable number of arguments of different types. See + * details::makeItem below for details on what types may be provided. In general, this accepts + * all of the types you'd expect and doest the things you'd expect (integral values are addes as + * Uint or Nint, std::string and char* are added as Tstr, bools are added as Bool, etc.). + */ + template + Array(Args&&... args); + + /** + * The above variadic constructor is disabled if sizeof(Args) != 1, so special + * case an explicit Array constructor for creating an Array with one Item. + */ + template + explicit Array(T&& v); + + /** + * Append a single element to the Array, of any compatible type. + */ + template + Array& add(T&& v) &; + template + Array&& add(T&& v) &&; + + bool isCompound() const override { return true; } + + virtual size_t size() const { return mEntries.size(); } + + size_t encodedSize() const override { + return std::accumulate(mEntries.begin(), mEntries.end(), headerSize(size()), + [](size_t sum, auto& entry) { return sum + entry->encodedSize(); }); + } + + using Item::encode; // Make base versions visible. + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override; + void encode(EncodeCallback encodeCallback) const override; + + const std::unique_ptr& operator[](size_t index) const { return get(index); } + std::unique_ptr& operator[](size_t index) { return get(index); } + + const std::unique_ptr& get(size_t index) const { return mEntries[index]; } + std::unique_ptr& get(size_t index) { return mEntries[index]; } + + MajorType type() const override { return kMajorType; } + using Item::asArray; + Array* asArray() override { return this; } + + std::unique_ptr clone() const override; + + auto begin() { return mEntries.begin(); } + auto begin() const { return mEntries.begin(); } + auto end() { return mEntries.end(); } + auto end() const { return mEntries.end(); } + + protected: + std::vector> mEntries; +}; + +/* + * Map is a concrete Item that implements CBOR major type 5. + * + * Note that Maps are not copyable. This is because copying them is expensive and making them + * move-only ensures that they're never copied accidentally. If you actually want to copy a + * Map, use the clone() method. + */ +class Map : public Item { + public: + static constexpr MajorType kMajorType = MAP; + + using entry_type = std::pair, std::unique_ptr>; + + Map() = default; + Map(const Map& other) = delete; + Map(Map&&) = default; + Map& operator=(const Map& other) = delete; + Map& operator=(Map&&) = default; + + bool operator==(const Map& other) const&; + + /** + * Construct a Map from a variable number of arguments of different types. An even number of + * arguments must be provided (this is verified statically). See details::makeItem below for + * details on what types may be provided. In general, this accepts all of the types you'd + * expect and doest the things you'd expect (integral values are addes as Uint or Nint, + * std::string and char* are added as Tstr, bools are added as Bool, etc.). + */ + template + Map(Args&&... args); + + /** + * Append a key/value pair to the Map, of any compatible types. + */ + template + Map& add(Key&& key, Value&& value) &; + template + Map&& add(Key&& key, Value&& value) &&; + + bool isCompound() const override { return true; } + + virtual size_t size() const { return mEntries.size(); } + + size_t encodedSize() const override { + return std::accumulate( + mEntries.begin(), mEntries.end(), headerSize(size()), [](size_t sum, auto& entry) { + return sum + entry.first->encodedSize() + entry.second->encodedSize(); + }); + } + + using Item::encode; // Make base versions visible. + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override; + void encode(EncodeCallback encodeCallback) const override; + + /** + * Find and return the value associated with `key`, if any. + * + * If the searched-for `key` is not present, returns `nullptr`. + * + * Note that if the map is canonicalized (sorted), Map::get() performs a binary search. If your + * map is large and you're searching in it many times, it may be worthwhile to canonicalize it + * to make Map::get() faster. Any use of a method that might modify the map disables the + * speedup. + */ + template + const std::unique_ptr& get(Key key) const; + + // Note that use of non-const operator[] marks the map as not canonicalized. + auto& operator[](size_t index) { + mCanonicalized = false; + return mEntries[index]; + } + const auto& operator[](size_t index) const { return mEntries[index]; } + + MajorType type() const override { return kMajorType; } + using Item::asMap; + Map* asMap() override { return this; } + + /** + * Sorts the map in canonical order, as defined in RFC 7049. Use this before encoding if you + * want canonicalization; cppbor does not canonicalize by default, though the integer encodings + * are always canonical and cppbor does not support indefinite-length encodings, so map order + * canonicalization is the only thing that needs to be done. + * + * @param recurse If set to true, canonicalize() will also walk the contents of the map and + * canonicalize any contained maps as well. + */ + Map& canonicalize(bool recurse = false) &; + Map&& canonicalize(bool recurse = false) && { + canonicalize(recurse); + return std::move(*this); + } + + bool isCanonical() { return mCanonicalized; } + + std::unique_ptr clone() const override; + + auto begin() { + mCanonicalized = false; + return mEntries.begin(); + } + auto begin() const { return mEntries.begin(); } + auto end() { + mCanonicalized = false; + return mEntries.end(); + } + auto end() const { return mEntries.end(); } + + // Returns true if a < b, per CBOR map key canonicalization rules. + static bool keyLess(const Item* a, const Item* b); + + protected: + std::vector mEntries; + + private: + bool mCanonicalized = false; +}; + +class SemanticTag : public Item { + public: + static constexpr MajorType kMajorType = SEMANTIC; + + template + SemanticTag(uint64_t tagValue, T&& taggedItem); + SemanticTag(const SemanticTag& other) = delete; + SemanticTag(SemanticTag&&) = default; + SemanticTag& operator=(const SemanticTag& other) = delete; + SemanticTag& operator=(SemanticTag&&) = default; + + bool operator==(const SemanticTag& other) const& { + return mValue == other.mValue && *mTaggedItem == *other.mTaggedItem; + } + + bool isCompound() const override { return true; } + + virtual size_t size() const { return 1; } + + // Encoding returns the tag + enclosed Item. + size_t encodedSize() const override { return headerSize(mValue) + mTaggedItem->encodedSize(); } + + using Item::encode; // Make base versions visible. + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override; + void encode(EncodeCallback encodeCallback) const override; + + // type() is a bit special. In normal usage it should return the wrapped type, but during + // parsing when we haven't yet parsed the tagged item, it needs to return SEMANTIC. + MajorType type() const override { return mTaggedItem ? mTaggedItem->type() : SEMANTIC; } + using Item::asSemanticTag; + SemanticTag* asSemanticTag() override { return this; } + + // Type information reflects the enclosed Item. Note that if the immediately-enclosed Item is + // another tag, these methods will recurse down to the non-tag Item. + using Item::asInt; + Int* asInt() override { return mTaggedItem->asInt(); } + using Item::asUint; + Uint* asUint() override { return mTaggedItem->asUint(); } + using Item::asNint; + Nint* asNint() override { return mTaggedItem->asNint(); } + using Item::asTstr; + Tstr* asTstr() override { return mTaggedItem->asTstr(); } + using Item::asBstr; + Bstr* asBstr() override { return mTaggedItem->asBstr(); } + using Item::asSimple; + Simple* asSimple() override { return mTaggedItem->asSimple(); } + using Item::asMap; + Map* asMap() override { return mTaggedItem->asMap(); } + using Item::asArray; + Array* asArray() override { return mTaggedItem->asArray(); } + using Item::asViewTstr; + ViewTstr* asViewTstr() override { return mTaggedItem->asViewTstr(); } + using Item::asViewBstr; + ViewBstr* asViewBstr() override { return mTaggedItem->asViewBstr(); } + + std::unique_ptr clone() const override; + + size_t semanticTagCount() const override; + uint64_t semanticTag(size_t nesting = 0) const override; + + protected: + SemanticTag() = default; + SemanticTag(uint64_t value) : mValue(value) {} + uint64_t mValue; + std::unique_ptr mTaggedItem; +}; + +/** + * Simple is abstract Item that implements CBOR major type 7. It is intended to be subclassed to + * create concrete Simple types. At present only Bool is provided. + */ +class Simple : public Item { + public: + static constexpr MajorType kMajorType = SIMPLE; + + bool operator==(const Simple& other) const&; + + virtual SimpleType simpleType() const = 0; + MajorType type() const override { return kMajorType; } + + Simple* asSimple() override { return this; } +}; + +/** + * Bool is a concrete type that implements CBOR major type 7, with additional item values for TRUE + * and FALSE. + */ +class Bool : public Simple { + public: + static constexpr SimpleType kSimpleType = BOOLEAN; + + explicit Bool(bool v) : mValue(v) {} + + bool operator==(const Bool& other) const& { return mValue == other.mValue; } + + SimpleType simpleType() const override { return kSimpleType; } + Bool* asBool() override { return this; } + + size_t encodedSize() const override { return 1; } + + using Item::encode; + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override { + return encodeHeader(mValue ? TRUE : FALSE, pos, end); + } + void encode(EncodeCallback encodeCallback) const override { + encodeHeader(mValue ? TRUE : FALSE, encodeCallback); + } + + bool value() const { return mValue; } + + std::unique_ptr clone() const override { return std::make_unique(mValue); } + + private: + bool mValue; +}; + +/** + * Null is a concrete type that implements CBOR major type 7, with additional item value for NULL + */ +class Null : public Simple { + public: + static constexpr SimpleType kSimpleType = NULL_T; + + explicit Null() {} + + SimpleType simpleType() const override { return kSimpleType; } + Null* asNull() override { return this; } + + size_t encodedSize() const override { return 1; } + + using Item::encode; + uint8_t* encode(uint8_t* pos, const uint8_t* end) const override { + return encodeHeader(NULL_V, pos, end); + } + void encode(EncodeCallback encodeCallback) const override { + encodeHeader(NULL_V, encodeCallback); + } + + std::unique_ptr clone() const override { return std::make_unique(); } +}; + +/** + * Returns pretty-printed CBOR for |item| + * + * If a byte-string is larger than |maxBStrSize| its contents will not be printed, instead the value + * of the form "" will be + * printed. Pass zero for |maxBStrSize| to disable this. + * + * The |mapKeysToNotPrint| parameter specifies the name of map values to not print. This is useful + * for unit tests. + */ +std::string prettyPrint(const Item* item, size_t maxBStrSize = 32, + const std::vector& mapKeysToNotPrint = {}); + +/** + * Returns pretty-printed CBOR for |value|. + * + * Only valid CBOR should be passed to this function. + * + * If a byte-string is larger than |maxBStrSize| its contents will not be printed, instead the value + * of the form "" will be + * printed. Pass zero for |maxBStrSize| to disable this. + * + * The |mapKeysToNotPrint| parameter specifies the name of map values to not print. This is useful + * for unit tests. + */ +std::string prettyPrint(const std::vector& encodedCbor, size_t maxBStrSize = 32, + const std::vector& mapKeysToNotPrint = {}); + +/** + * Details. Mostly you shouldn't have to look below, except perhaps at the docstring for makeItem. + */ +namespace details { + +template +struct is_iterator_pair_over : public std::false_type {}; + +template +struct is_iterator_pair_over< + std::pair, V, + typename std::enable_if_t::value_type>>> + : public std::true_type {}; + +template +struct is_unique_ptr_of_subclass_of_v : public std::false_type {}; + +template +struct is_unique_ptr_of_subclass_of_v, + typename std::enable_if_t>> + : public std::true_type {}; + +/* check if type is one of std::string (1), std::string_view (2), null-terminated char* (3) or pair + * of iterators (4)*/ +template +struct is_text_type_v : public std::false_type {}; + +template +struct is_text_type_v< + T, typename std::enable_if_t< + /* case 1 */ // + std::is_same_v>, std::string> + /* case 2 */ // + || std::is_same_v>, std::string_view> + /* case 3 */ // + || std::is_same_v>, char*> // + || std::is_same_v>, const char*> + /* case 4 */ + || details::is_iterator_pair_over::value>> : public std::true_type {}; + +/** + * Construct a unique_ptr from many argument types. Accepts: + * + * (a) booleans; + * (b) integers, all sizes and signs; + * (c) text strings, as defined by is_text_type_v above; + * (d) byte strings, as std::vector(d1), pair of iterators (d2) or pair + * (d3); and + * (e) Item subclass instances, including Array and Map. Items may be provided by naked pointer + * (e1), unique_ptr (e2), reference (e3) or value (e3). If provided by reference or value, will + * be moved if possible. If provided by pointer, ownership is taken. + * (f) null pointer; + * (g) enums, using the underlying integer value. + */ +template +std::unique_ptr makeItem(T v) { + Item* p = nullptr; + if constexpr (/* case a */ std::is_same_v) { + p = new Bool(v); + } else if constexpr (/* case b */ std::is_integral_v) { // b + if (v < 0) { + p = new Nint(v); + } else { + p = new Uint(static_cast(v)); + } + } else if constexpr (/* case c */ // + details::is_text_type_v::value) { + p = new Tstr(v); + } else if constexpr (/* case d1 */ // + std::is_same_v>, + std::vector> + /* case d2 */ // + || details::is_iterator_pair_over::value + /* case d3 */ // + || std::is_same_v>, + std::pair>) { + p = new Bstr(v); + } else if constexpr (/* case e1 */ // + std::is_pointer_v && + std::is_base_of_v>) { + p = v; + } else if constexpr (/* case e2 */ // + details::is_unique_ptr_of_subclass_of_v::value) { + p = v.release(); + } else if constexpr (/* case e3 */ // + std::is_base_of_v) { + p = new T(std::move(v)); + } else if constexpr (/* case f */ std::is_null_pointer_v) { + p = new Null(); + } else if constexpr (/* case g */ std::is_enum_v) { + return makeItem(static_cast>(v)); + } else { + // It's odd that this can't be static_assert(false), since it shouldn't be evaluated if one + // of the above ifs matches. But static_assert(false) always triggers. + static_assert(std::is_same_v, "makeItem called with unsupported type"); + } + return std::unique_ptr(p); +} + +inline void map_helper(Map& /* map */) {} + +template +inline void map_helper(Map& map, Key&& key, Value&& value, Rest&&... rest) { + map.add(std::forward(key), std::forward(value)); + map_helper(map, std::forward(rest)...); +} + +} // namespace details + +template > +Array::Array(Args&&... args) { + mEntries.reserve(sizeof...(args)); + (mEntries.push_back(details::makeItem(std::forward(args))), ...); +} + +template >>>> +Array::Array(T&& v) { + mEntries.push_back(details::makeItem(std::forward(v))); +} + +template +Array& Array::add(T&& v) & { + mEntries.push_back(details::makeItem(std::forward(v))); + return *this; +} + +template +Array&& Array::add(T&& v) && { + mEntries.push_back(details::makeItem(std::forward(v))); + return std::move(*this); +} + +template > +Map::Map(Args&&... args) { + static_assert((sizeof...(Args)) % 2 == 0, "Map must have an even number of entries"); + mEntries.reserve(sizeof...(args) / 2); + details::map_helper(*this, std::forward(args)...); +} + +template +Map& Map::add(Key&& key, Value&& value) & { + mEntries.push_back({details::makeItem(std::forward(key)), + details::makeItem(std::forward(value))}); + mCanonicalized = false; + return *this; +} + +template +Map&& Map::add(Key&& key, Value&& value) && { + this->add(std::forward(key), std::forward(value)); + return std::move(*this); +} + +static const std::unique_ptr kEmptyItemPtr; + +template || std::is_enum_v || + details::is_text_type_v::value>> +const std::unique_ptr& Map::get(Key key) const { + auto keyItem = details::makeItem(key); + + if (mCanonicalized) { + // It's sorted, so binary-search it. + auto found = std::lower_bound(begin(), end(), keyItem.get(), + [](const entry_type& entry, const Item* key) { + return keyLess(entry.first.get(), key); + }); + return (found == end() || *found->first != *keyItem) ? kEmptyItemPtr : found->second; + } else { + // Unsorted, do a linear search. + auto found = std::find_if( + begin(), end(), [&](const entry_type& entry) { return *entry.first == *keyItem; }); + return found == end() ? kEmptyItemPtr : found->second; + } +} + +template +SemanticTag::SemanticTag(uint64_t value, T&& taggedItem) + : mValue(value), mTaggedItem(details::makeItem(std::forward(taggedItem))) {} + +} // namespace cppbor diff --git a/lib/libcppbor/include/cppbor/cppbor_parse.h b/lib/libcppbor/include/cppbor/cppbor_parse.h new file mode 100644 index 00000000..22cd18d0 --- /dev/null +++ b/lib/libcppbor/include/cppbor/cppbor_parse.h @@ -0,0 +1,195 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "cppbor.h" + +namespace cppbor { + +using ParseResult = std::tuple /* result */, const uint8_t* /* newPos */, + std::string /* errMsg */>; + +/** + * Parse the first CBOR data item (possibly compound) from the range [begin, end). + * + * Returns a tuple of Item pointer, buffer pointer and error message. If parsing is successful, the + * Item pointer is non-null, the buffer pointer points to the first byte after the + * successfully-parsed item and the error message string is empty. If parsing fails, the Item + * pointer is null, the buffer pointer points to the first byte that was unparseable (the first byte + * of a data item header that is malformed in some way, e.g. an invalid value, or a length that is + * too large for the remaining buffer, etc.) and the string contains an error message describing the + * problem encountered. + */ +ParseResult parse(const uint8_t* begin, const uint8_t* end); + +/** + * Parse the first CBOR data item (possibly compound) from the range [begin, end). + * + * Returns a tuple of Item pointer, buffer pointer and error message. If parsing is successful, the + * Item pointer is non-null, the buffer pointer points to the first byte after the + * successfully-parsed item and the error message string is empty. If parsing fails, the Item + * pointer is null, the buffer pointer points to the first byte that was unparseable (the first byte + * of a data item header that is malformed in some way, e.g. an invalid value, or a length that is + * too large for the remaining buffer, etc.) and the string contains an error message describing the + * problem encountered. + * + * The returned CBOR data item will contain View* items backed by + * std::string_view types over the input range. + * WARNING! If the input range changes underneath, the corresponding views will + * carry the same change. + */ +ParseResult parseWithViews(const uint8_t* begin, const uint8_t* end); + +/** + * Parse the first CBOR data item (possibly compound) from the byte vector. + * + * Returns a tuple of Item pointer, buffer pointer and error message. If parsing is successful, the + * Item pointer is non-null, the buffer pointer points to the first byte after the + * successfully-parsed item and the error message string is empty. If parsing fails, the Item + * pointer is null, the buffer pointer points to the first byte that was unparseable (the first byte + * of a data item header that is malformed in some way, e.g. an invalid value, or a length that is + * too large for the remaining buffer, etc.) and the string contains an error message describing the + * problem encountered. + */ +inline ParseResult parse(const std::vector& encoding) { + return parse(encoding.data(), encoding.data() + encoding.size()); +} + +/** + * Parse the first CBOR data item (possibly compound) from the range [begin, begin + size). + * + * Returns a tuple of Item pointer, buffer pointer and error message. If parsing is successful, the + * Item pointer is non-null, the buffer pointer points to the first byte after the + * successfully-parsed item and the error message string is empty. If parsing fails, the Item + * pointer is null, the buffer pointer points to the first byte that was unparseable (the first byte + * of a data item header that is malformed in some way, e.g. an invalid value, or a length that is + * too large for the remaining buffer, etc.) and the string contains an error message describing the + * problem encountered. + */ +inline ParseResult parse(const uint8_t* begin, size_t size) { + return parse(begin, begin + size); +} + +/** + * Parse the first CBOR data item (possibly compound) from the range [begin, begin + size). + * + * Returns a tuple of Item pointer, buffer pointer and error message. If parsing is successful, the + * Item pointer is non-null, the buffer pointer points to the first byte after the + * successfully-parsed item and the error message string is empty. If parsing fails, the Item + * pointer is null, the buffer pointer points to the first byte that was unparseable (the first byte + * of a data item header that is malformed in some way, e.g. an invalid value, or a length that is + * too large for the remaining buffer, etc.) and the string contains an error message describing the + * problem encountered. + * + * The returned CBOR data item will contain View* items backed by + * std::string_view types over the input range. + * WARNING! If the input range changes underneath, the corresponding views will + * carry the same change. + */ +inline ParseResult parseWithViews(const uint8_t* begin, size_t size) { + return parseWithViews(begin, begin + size); +} + +/** + * Parse the first CBOR data item (possibly compound) from the value contained in a Bstr. + * + * Returns a tuple of Item pointer, buffer pointer and error message. If parsing is successful, the + * Item pointer is non-null, the buffer pointer points to the first byte after the + * successfully-parsed item and the error message string is empty. If parsing fails, the Item + * pointer is null, the buffer pointer points to the first byte that was unparseable (the first byte + * of a data item header that is malformed in some way, e.g. an invalid value, or a length that is + * too large for the remaining buffer, etc.) and the string contains an error message describing the + * problem encountered. + */ +inline ParseResult parse(const Bstr* bstr) { + if (!bstr) + return ParseResult(nullptr, nullptr, "Null Bstr pointer"); + return parse(bstr->value()); +} + +class ParseClient; + +/** + * Parse the CBOR data in the range [begin, end) in streaming fashion, calling methods on the + * provided ParseClient when elements are found. + */ +void parse(const uint8_t* begin, const uint8_t* end, ParseClient* parseClient); + +/** + * Parse the CBOR data in the range [begin, end) in streaming fashion, calling methods on the + * provided ParseClient when elements are found. Uses the View* item types + * instead of the copying ones. + */ +void parseWithViews(const uint8_t* begin, const uint8_t* end, ParseClient* parseClient); + +/** + * Parse the CBOR data in the vector in streaming fashion, calling methods on the + * provided ParseClient when elements are found. + */ +inline void parse(const std::vector& encoding, ParseClient* parseClient) { + return parse(encoding.data(), encoding.data() + encoding.size(), parseClient); +} + +/** + * A pure interface that callers of the streaming parse functions must implement. + */ +class ParseClient { + public: + virtual ~ParseClient() {} + + /** + * Called when an item is found. The Item pointer points to the found item; use type() and + * the appropriate as*() method to examine the value. hdrBegin points to the first byte of the + * header, valueBegin points to the first byte of the value and end points one past the end of + * the item. In the case of header-only items, such as integers, and compound items (ARRAY, + * MAP or SEMANTIC) whose end has not yet been found, valueBegin and end are equal and point to + * the byte past the header. + * + * Note that for compound types (ARRAY, MAP, and SEMANTIC), the Item will have no content. For + * Map and Array items, the size() method will return a correct value, but the index operators + * are unsafe, and the object cannot be safely compared with another Array/Map. + * + * The method returns a ParseClient*. In most cases "return this;" will be the right answer, + * but a different ParseClient may be returned, which the parser will begin using. If the method + * returns nullptr, parsing will be aborted immediately. + */ + virtual ParseClient* item(std::unique_ptr& item, const uint8_t* hdrBegin, + const uint8_t* valueBegin, const uint8_t* end) = 0; + + /** + * Called when the end of a compound item (MAP or ARRAY) is found. The item argument will be + * the same one passed to the item() call -- and may be empty if item() moved its value out. + * hdrBegin, valueBegin and end point to the beginning of the item header, the beginning of the + * first contained value, and one past the end of the last contained value, respectively. + * + * Note that the Item will have no content. + * + * As with item(), itemEnd() can change the ParseClient by returning a different one, or end the + * parsing by returning nullptr; + */ + virtual ParseClient* itemEnd(std::unique_ptr& item, const uint8_t* hdrBegin, + const uint8_t* valueBegin, const uint8_t* end) = 0; + + /** + * Called when parsing encounters an error. position is set to the first unparsed byte (one + * past the last successfully-parsed byte) and errorMessage contains an message explaining what + * sort of error occurred. + */ + virtual void error(const uint8_t* position, const std::string& errorMessage) = 0; +}; + +} // namespace cppbor -- cgit v1.2.3