diff options
Diffstat (limited to 'src/database/records.cpp')
| -rw-r--r-- | src/database/records.cpp | 260 |
1 files changed, 0 insertions, 260 deletions
diff --git a/src/database/records.cpp b/src/database/records.cpp deleted file mode 100644 index b086be3b..00000000 --- a/src/database/records.cpp +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Copyright 2023 jacqueline <me@jacqueline.id.au> - * - * SPDX-License-Identifier: GPL-3.0-only - */ - -#include "records.hpp" - -#include <stdint.h> -#include <sys/_stdint.h> - -#include <functional> -#include <iomanip> -#include <iostream> -#include <memory_resource> -#include <sstream> -#include <string> -#include <vector> - -#include "cppbor.h" -#include "cppbor_parse.h" -#include "esp_log.h" - -#include "index.hpp" -#include "komihash.h" -#include "memory_resource.hpp" -#include "track.hpp" - -// As LevelDB is a key-value store, each record in the database consists of a -// key and an optional value. -// -// Values, when present, are always cbor-encoded. This is fast, compact, and -// very easy to evolve over time due to its inclusion of type information. -// -// Keys have a more complicated scheme, as for performance we rely heavily on -// LevelDB's sorted storage format. We must therefore worry about clustering of -// similar records, and the sortability of our encoding format. -// Each kind of key consists of a a single-byte prefix, then one or more -// fields separated by null (0) bytes. Each field may be cbor-encoded, or may -// use some bespoke encoding; it depends on whether we want to be able to sort -// by that field. -// For debugging and discussion purposes, we represent field separators -// textually as '/', and write each field as its hex encoding. e.g. a data key -// for the track with id 17 would be written as 'D / 0x11'. - -namespace database { - -[[maybe_unused]] static const char* kTag = "RECORDS"; - -static const char kPathPrefix = 'P'; -static const char kDataPrefix = 'D'; -static const char kHashPrefix = 'H'; -static const char kTagHashPrefix = 'T'; -static const char kIndexPrefix = 'I'; -static const char kFieldSeparator = '\0'; - -static constexpr auto makePrefix(char p) -> std::string { - std::string str; - str += p; - str += kFieldSeparator; - return str; -} - -auto EncodePathKey(std::string_view path) -> std::string { - std::stringstream out{}; - out << makePrefix(kPathPrefix); - out << path; - return out.str(); -} - -/* 'D/' */ -auto EncodeDataPrefix() -> std::string { - return makePrefix(kDataPrefix); -} - -/* 'D/ 0xACAB' */ -auto EncodeDataKey(const TrackId& id) -> std::string { - return EncodeDataPrefix() + TrackIdToBytes(id); -} - -auto EncodeDataValue(const TrackData& track) -> std::string { - auto* tag_hashes = new cppbor::Map{}; // Free'd by Array's dtor. - for (const auto& entry : track.individual_tag_hashes) { - tag_hashes->add(cppbor::Uint{static_cast<uint32_t>(entry.first)}, - cppbor::Uint{entry.second}); - } - cppbor::Array val{ - cppbor::Uint{track.id}, - cppbor::Tstr{track.filepath}, - cppbor::Uint{track.tags_hash}, - cppbor::Bool{track.is_tombstoned}, - cppbor::Uint{track.modified_at.first}, - cppbor::Uint{track.modified_at.second}, - tag_hashes, - }; - return val.toString(); -} - -auto ParseDataValue(const leveldb::Slice& slice) -> std::shared_ptr<TrackData> { - auto [item, unused, err] = cppbor::parseWithViews( - reinterpret_cast<const uint8_t*>(slice.data()), slice.size()); - if (!item || item->type() != cppbor::ARRAY) { - return nullptr; - } - auto vals = item->asArray(); - if (vals->size() != 7 || vals->get(0)->type() != cppbor::UINT || - vals->get(1)->type() != cppbor::TSTR || - vals->get(2)->type() != cppbor::UINT || - vals->get(3)->type() != cppbor::SIMPLE || - vals->get(4)->type() != cppbor::UINT || - vals->get(5)->type() != cppbor::UINT || - vals->get(6)->type() != cppbor::MAP) { - return {}; - } - auto res = std::make_shared<TrackData>(); - res->id = vals->get(0)->asUint()->unsignedValue(); - res->filepath = vals->get(1)->asViewTstr()->view(); - res->tags_hash = vals->get(2)->asUint()->unsignedValue(); - res->is_tombstoned = vals->get(3)->asBool()->value(); - res->modified_at = std::make_pair<uint16_t, uint16_t>( - vals->get(4)->asUint()->unsignedValue(), - vals->get(5)->asUint()->unsignedValue()); - - auto tag_hashes = vals->get(6)->asMap(); - for (const auto& entry : *tag_hashes) { - auto tag = static_cast<Tag>(entry.first->asUint()->unsignedValue()); - res->individual_tag_hashes[tag] = entry.second->asUint()->unsignedValue(); - } - return res; -} - -/* 'H/ 0xBEEF' */ -auto EncodeHashKey(const uint64_t& hash) -> std::string { - return makePrefix(kHashPrefix) + cppbor::Uint{hash}.toString(); -} - -auto ParseHashValue(const leveldb::Slice& slice) -> std::optional<TrackId> { - return BytesToTrackId({slice.data(), slice.size()}); -} - -auto EncodeHashValue(TrackId id) -> std::string { - return TrackIdToBytes(id); -} - -/* 'T/ 0xBEEF' */ -auto EncodeTagHashKey(const uint64_t& hash) -> std::string { - return makePrefix(kTagHashPrefix) + cppbor::Uint{hash}.toString(); -} - -/* 'I/' */ -auto EncodeAllIndexesPrefix() -> std::string { - return makePrefix(kIndexPrefix); -} - -auto EncodeIndexPrefix(const IndexKey::Header& header) -> std::string { - std::ostringstream out; - out << makePrefix(kIndexPrefix); - cppbor::Array val{ - cppbor::Uint{header.id}, - cppbor::Uint{header.depth}, - cppbor::Uint{header.components_hash}, - }; - out << val.toString() << kFieldSeparator; - return out.str(); -} - -/* - * 'I/0xa2/0x686921/0xb9' - * ^ --- trailer - * ^ --- component ("hi!") - * ^ -------- header - * - * The components *must* be encoded in a way that is easy to sort - * lexicographically. The header and footer do not have this restriction, so - * cbor is fine. - * - * We store grouping information within the header; which index, filtered - * components. We store disambiguation information in the trailer; just a track - * id for now, but could reasonably be something like 'release year' as well. - */ -auto EncodeIndexKey(const IndexKey& key) -> std::string { - std::ostringstream out{}; - - out << EncodeIndexPrefix(key.header); - - // The component should already be UTF-8 encoded, so just write it. - if (key.item) { - out << *key.item << kFieldSeparator; - } - - if (key.track) { - out << TrackIdToBytes(*key.track); - } - - return out.str(); -} - -auto ParseIndexKey(const leveldb::Slice& slice) -> std::optional<IndexKey> { - IndexKey result{}; - - auto prefix = EncodeAllIndexesPrefix(); - if (!slice.starts_with(prefix)) { - return {}; - } - - std::string key_data = slice.ToString().substr(prefix.size()); - auto [key, end_of_key, err] = cppbor::parseWithViews( - reinterpret_cast<const uint8_t*>(key_data.data()), key_data.size()); - if (!key || key->type() != cppbor::ARRAY) { - return {}; - } - auto as_array = key->asArray(); - if (as_array->size() != 3 || as_array->get(0)->type() != cppbor::UINT || - as_array->get(1)->type() != cppbor::UINT || - as_array->get(2)->type() != cppbor::UINT) { - return {}; - } - result.header.id = as_array->get(0)->asUint()->unsignedValue(); - result.header.depth = as_array->get(1)->asUint()->unsignedValue(); - result.header.components_hash = as_array->get(2)->asUint()->unsignedValue(); - - size_t header_length = - reinterpret_cast<const char*>(end_of_key) - key_data.data(); - - if (header_length == 0 || header_length >= key_data.size()) { - return {}; - } - - std::istringstream in(key_data.substr(header_length + 1)); - std::stringbuf buffer{}; - - in.get(buffer, kFieldSeparator); - if (buffer.str().size() > 0) { - result.item = buffer.str(); - } - - buffer = {}; - in.get(buffer); - std::string id_str = buffer.str(); - if (id_str.size() > 1) { - result.track = BytesToTrackId(id_str.substr(1)); - } - - return result; -} - -auto TrackIdToBytes(TrackId id) -> std::string { - return cppbor::Uint{id}.toString(); -} - -auto BytesToTrackId(std::span<const char> bytes) -> std::optional<TrackId> { - auto [res, unused, err] = cppbor::parse( - reinterpret_cast<const uint8_t*>(bytes.data()), bytes.size()); - if (!res || res->type() != cppbor::UINT) { - return {}; - } - return res->asUint()->unsignedValue(); -} - -} // namespace database |
