diff options
| author | jacqueline <me@jacqueline.id.au> | 2024-05-02 19:12:26 +1000 |
|---|---|---|
| committer | jacqueline <me@jacqueline.id.au> | 2024-05-02 19:12:26 +1000 |
| commit | 1573a8c4cde1cd9528b422b2dcc598e37ffe94a7 (patch) | |
| tree | d162822b8fd7054f81bace0c7a65ab4d5e6f93ef /src/tangara/database | |
| parent | a231fd1c8afedbeb14b0bc77d76bad61db986059 (diff) | |
| download | tangara-fw-1573a8c4cde1cd9528b422b2dcc598e37ffe94a7.tar.gz | |
WIP merge cyclically dependent components into one big component
Diffstat (limited to 'src/tangara/database')
| -rw-r--r-- | src/tangara/database/database.cpp | 820 | ||||
| -rw-r--r-- | src/tangara/database/database.hpp | 244 | ||||
| -rw-r--r-- | src/tangara/database/db_events.hpp | 29 | ||||
| -rw-r--r-- | src/tangara/database/env_esp.cpp | 497 | ||||
| -rw-r--r-- | src/tangara/database/env_esp.hpp | 143 | ||||
| -rw-r--r-- | src/tangara/database/file_gatherer.cpp | 80 | ||||
| -rw-r--r-- | src/tangara/database/file_gatherer.hpp | 36 | ||||
| -rw-r--r-- | src/tangara/database/future_fetcher.hpp | 62 | ||||
| -rw-r--r-- | src/tangara/database/index.cpp | 206 | ||||
| -rw-r--r-- | src/tangara/database/index.hpp | 78 | ||||
| -rw-r--r-- | src/tangara/database/records.cpp | 260 | ||||
| -rw-r--r-- | src/tangara/database/records.hpp | 85 | ||||
| -rw-r--r-- | src/tangara/database/tag_parser.cpp | 208 | ||||
| -rw-r--r-- | src/tangara/database/tag_parser.hpp | 44 | ||||
| -rw-r--r-- | src/tangara/database/test/CMakeLists.txt | 8 | ||||
| -rw-r--r-- | src/tangara/database/test/test_database.cpp | 210 | ||||
| -rw-r--r-- | src/tangara/database/test/test_records.cpp | 146 | ||||
| -rw-r--r-- | src/tangara/database/track.cpp | 307 | ||||
| -rw-r--r-- | src/tangara/database/track.hpp | 205 |
19 files changed, 3668 insertions, 0 deletions
diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp new file mode 100644 index 00000000..48fb0c63 --- /dev/null +++ b/src/tangara/database/database.cpp @@ -0,0 +1,820 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "database.hpp" + +#include <stdint.h> +#include <sys/_stdint.h> + +#include <algorithm> +#include <cstdint> +#include <functional> +#include <iomanip> +#include <iostream> +#include <memory> +#include <optional> +#include <sstream> +#include <string> +#include <variant> + +#include "collation.hpp" +#include "cppbor.h" +#include "cppbor_parse.h" +#include "esp_log.h" +#include "ff.h" +#include "freertos/projdefs.h" +#include "index.hpp" +#include "komihash.h" +#include "leveldb/cache.h" +#include "leveldb/db.h" +#include "leveldb/iterator.h" +#include "leveldb/options.h" +#include "leveldb/slice.h" +#include "leveldb/status.h" +#include "leveldb/write_batch.h" + +#include "db_events.hpp" +#include "env_esp.hpp" +#include "event_queue.hpp" +#include "file_gatherer.hpp" +#include "memory_resource.hpp" +#include "records.hpp" +#include "result.hpp" +#include "spi.hpp" +#include "tag_parser.hpp" +#include "tasks.hpp" +#include "track.hpp" + +namespace database { + +static SingletonEnv<leveldb::EspEnv> sEnv; +[[maybe_unused]] static const char* kTag = "DB"; + +static const char kDbPath[] = "/.tangara-db"; + +static const char kKeyDbVersion[] = "schema_version"; + +static const char kKeyCustom[] = "U\0"; +static const char kKeyCollator[] = "collator"; +static const char kKeyTrackId[] = "next_track_id"; + +static std::atomic<bool> sIsDbOpen(false); + +static auto CreateNewDatabase(leveldb::Options& options, locale::ICollator& col) + -> leveldb::DB* { + Database::Destroy(); + leveldb::DB* db; + options.create_if_missing = true; + auto status = leveldb::DB::Open(options, kDbPath, &db); + if (!status.ok()) { + ESP_LOGE(kTag, "failed to open db, status %s", status.ToString().c_str()); + return nullptr; + } + auto version_str = std::to_string(kCurrentDbVersion); + status = db->Put(leveldb::WriteOptions{}, kKeyDbVersion, version_str); + if (!status.ok()) { + delete db; + return nullptr; + } + ESP_LOGI(kTag, "opening db with collator %s", + col.Describe().value_or("NULL").c_str()); + status = db->Put(leveldb::WriteOptions{}, kKeyCollator, + col.Describe().value_or("")); + if (!status.ok()) { + delete db; + return nullptr; + } + return db; +} + +static auto CheckDatabase(leveldb::DB& db, locale::ICollator& col) -> bool { + leveldb::Status status; + + std::string raw_version; + std::optional<uint8_t> version{}; + status = db.Get(leveldb::ReadOptions{}, kKeyDbVersion, &raw_version); + if (status.ok()) { + version = std::stoi(raw_version); + } + if (!version || *version != kCurrentDbVersion) { + ESP_LOGW(kTag, "db version missing or incorrect"); + return false; + } + + std::string collator; + status = db.Get(leveldb::ReadOptions{}, kKeyCollator, &collator); + if (!status.ok()) { + ESP_LOGW(kTag, "db collator is unknown"); + return false; + } + auto needed = col.Describe(); + + if ((needed && needed.value() != collator) || + (!needed && !collator.empty())) { + ESP_LOGW(kTag, "db collator is mismatched"); + return false; + } + + return true; +} + +auto Database::Open(IFileGatherer& gatherer, + ITagParser& parser, + locale::ICollator& collator, + tasks::WorkerPool& bg_worker) + -> cpp::result<Database*, DatabaseError> { + if (sIsDbOpen.exchange(true)) { + return cpp::fail(DatabaseError::ALREADY_OPEN); + } + + if (!leveldb::sBackgroundThread) { + leveldb::sBackgroundThread = &bg_worker; + } + + return bg_worker + .Dispatch<cpp::result<Database*, DatabaseError>>( + [&]() -> cpp::result<Database*, DatabaseError> { + leveldb::DB* db; + std::unique_ptr<leveldb::Cache> cache{ + leveldb::NewLRUCache(256 * 1024)}; + + leveldb::Options options; + options.env = sEnv.env(); + options.write_buffer_size = 4 * 1024; + options.max_file_size = 16 * 1024; + options.block_cache = cache.get(); + options.block_size = 2048; + + auto status = leveldb::DB::Open(options, kDbPath, &db); + if (!status.ok()) { + ESP_LOGI(kTag, "opening db failed. recreating."); + db = CreateNewDatabase(options, collator); + if (db == nullptr) { + return cpp::fail(FAILED_TO_OPEN); + } + } + + if (!CheckDatabase(*db, collator)) { + ESP_LOGI(kTag, "db incompatible. recreating."); + delete db; + db = CreateNewDatabase(options, collator); + if (db == nullptr) { + return cpp::fail(FAILED_TO_OPEN); + } + } + + ESP_LOGI(kTag, "Database opened successfully"); + return new Database(db, cache.release(), gatherer, parser, + collator); + }) + .get(); +} + +auto Database::Destroy() -> void { + leveldb::Options options; + options.env = sEnv.env(); + leveldb::DestroyDB(kDbPath, options); +} + +Database::Database(leveldb::DB* db, + leveldb::Cache* cache, + IFileGatherer& file_gatherer, + ITagParser& tag_parser, + locale::ICollator& collator) + : db_(db), + cache_(cache), + file_gatherer_(file_gatherer), + tag_parser_(tag_parser), + collator_(collator), + is_updating_(false) {} + +Database::~Database() { + // Delete db_ first so that any outstanding background work finishes before + // the background task is killed. + delete db_; + delete cache_; + + sIsDbOpen.store(false); +} + +auto Database::schemaVersion() -> std::string { + // If the database is open, then it must have the current schema. + return std::to_string(kCurrentDbVersion); +} + +auto Database::sizeOnDiskBytes() -> size_t { + auto lock = drivers::acquire_spi(); + + FF_DIR dir; + FRESULT res = f_opendir(&dir, kDbPath); + if (res != FR_OK) { + return 0; + } + + size_t total_size = 0; + for (;;) { + FILINFO info; + res = f_readdir(&dir, &info); + if (res != FR_OK || info.fname[0] == 0) { + break; + } + total_size += info.fsize; + } + + return total_size; +} + +auto Database::put(const std::string& key, const std::string& val) -> void { + if (val.empty()) { + db_->Delete(leveldb::WriteOptions{}, kKeyCustom + key); + } else { + db_->Put(leveldb::WriteOptions{}, kKeyCustom + key, val); + } +} + +auto Database::get(const std::string& key) -> std::optional<std::string> { + std::string val; + auto res = db_->Get(leveldb::ReadOptions{}, kKeyCustom + key, &val); + if (!res.ok() || val.empty()) { + return {}; + } + return val; +} + +auto Database::getTrackPath(TrackId id) -> std::optional<std::string> { + auto track_data = dbGetTrackData(id); + if (!track_data) { + return {}; + } + return std::string{track_data->filepath.data(), track_data->filepath.size()}; +} + +auto Database::getTrack(TrackId id) -> std::shared_ptr<Track> { + std::shared_ptr<TrackData> data = dbGetTrackData(id); + if (!data || data->is_tombstoned) { + return {}; + } + std::shared_ptr<TrackTags> tags = tag_parser_.ReadAndParseTags( + {data->filepath.data(), data->filepath.size()}); + if (!tags) { + return {}; + } + return std::make_shared<Track>(data, tags); +} + +auto Database::getIndexes() -> std::vector<IndexInfo> { + // TODO(jacqueline): This probably needs to be async? When we have runtime + // configurable indexes, they will need to come from somewhere. + return { + kAllTracks, + kAllAlbums, + kAlbumsByArtist, + kTracksByGenre, + }; +} + +class UpdateNotifier { + public: + UpdateNotifier(std::atomic<bool>& is_updating) : is_updating_(is_updating) { + events::Ui().Dispatch(event::UpdateStarted{}); + events::System().Dispatch(event::UpdateStarted{}); + } + ~UpdateNotifier() { + is_updating_ = false; + events::Ui().Dispatch(event::UpdateFinished{}); + events::System().Dispatch(event::UpdateFinished{}); + } + + private: + std::atomic<bool>& is_updating_; +}; + +auto Database::updateIndexes() -> void { + if (is_updating_.exchange(true)) { + return; + } + UpdateNotifier notifier{is_updating_}; + + leveldb::ReadOptions read_options; + read_options.fill_cache = true; + + // Stage 1: verify all existing tracks are still valid. + ESP_LOGI(kTag, "verifying existing tracks"); + { + uint64_t num_processed = 0; + std::unique_ptr<leveldb::Iterator> it{db_->NewIterator(read_options)}; + std::string prefix = EncodeDataPrefix(); + for (it->Seek(prefix); it->Valid() && it->key().starts_with(prefix); + it->Next()) { + num_processed++; + events::Ui().Dispatch(event::UpdateProgress{ + .stage = event::UpdateProgress::Stage::kVerifyingExistingTracks, + .val = num_processed, + }); + + std::shared_ptr<TrackData> track = ParseDataValue(it->value()); + if (!track) { + // The value was malformed. Drop this record. + ESP_LOGW(kTag, "dropping malformed metadata"); + db_->Delete(leveldb::WriteOptions(), it->key()); + continue; + } + + if (track->is_tombstoned) { + ESP_LOGW(kTag, "skipping tombstoned %lx", track->id); + continue; + } + + FRESULT res; + FILINFO info; + { + auto lock = drivers::acquire_spi(); + res = f_stat(track->filepath.c_str(), &info); + } + + std::pair<uint16_t, uint16_t> modified_at{0, 0}; + if (res == FR_OK) { + modified_at = {info.fdate, info.ftime}; + } + if (modified_at == track->modified_at) { + continue; + } else { + track->modified_at = modified_at; + } + + std::shared_ptr<TrackTags> tags = tag_parser_.ReadAndParseTags( + {track->filepath.data(), track->filepath.size()}); + if (!tags || tags->encoding() == Container::kUnsupported) { + // We couldn't read the tags for this track. Either they were + // malformed, or perhaps the file is missing. Either way, tombstone + // this record. + ESP_LOGW(kTag, "entombing missing #%lx", track->id); + dbRemoveIndexes(track); + track->is_tombstoned = true; + dbPutTrackData(*track); + db_->Delete(leveldb::WriteOptions{}, EncodePathKey(track->filepath)); + continue; + } + + // At this point, we know that the track still exists in its original + // location. All that's left to do is update any metadata about it. + + uint64_t new_hash = tags->Hash(); + if (new_hash != track->tags_hash) { + // This track's tags have changed. Since the filepath is exactly the + // same, we assume this is a legitimate correction. Update the + // database. + ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash, + new_hash); + dbRemoveIndexes(track); + + track->tags_hash = new_hash; + dbIngestTagHashes(*tags, track->individual_tag_hashes); + dbPutTrackData(*track); + dbPutHash(new_hash, track->id); + } + } + } + + // Stage 2: search for newly added files. + ESP_LOGI(kTag, "scanning for new tracks"); + uint64_t num_processed = 0; + file_gatherer_.FindFiles("", [&](std::string_view path, const FILINFO& info) { + num_processed++; + events::Ui().Dispatch(event::UpdateProgress{ + .stage = event::UpdateProgress::Stage::kScanningForNewTracks, + .val = num_processed, + }); + + std::string unused; + if (db_->Get(read_options, EncodePathKey(path), &unused).ok()) { + // This file is already in the database; skip it. + return; + } + + std::shared_ptr<TrackTags> tags = tag_parser_.ReadAndParseTags(path); + if (!tags || tags->encoding() == Container::kUnsupported) { + // No parseable tags; skip this fiile. + return; + } + + // Check for any existing record with the same hash. + uint64_t hash = tags->Hash(); + std::string key = EncodeHashKey(hash); + std::optional<TrackId> existing_hash; + std::string raw_entry; + if (db_->Get(leveldb::ReadOptions(), key, &raw_entry).ok()) { + existing_hash = ParseHashValue(raw_entry); + } + + std::pair<uint16_t, uint16_t> modified{info.fdate, info.ftime}; + if (!existing_hash) { + // We've never met this track before! Or we have, but the entry is + // malformed. Either way, record this as a new track. + TrackId id = dbMintNewTrackId(); + ESP_LOGI(kTag, "recording new 0x%lx", id); + + auto data = std::make_shared<TrackData>(); + data->id = id; + data->filepath = path; + data->tags_hash = hash; + data->modified_at = modified; + dbIngestTagHashes(*tags, data->individual_tag_hashes); + + dbPutTrackData(*data); + dbPutHash(hash, id); + auto t = std::make_shared<Track>(data, tags); + dbCreateIndexesForTrack(*t); + db_->Put(leveldb::WriteOptions{}, EncodePathKey(path), + TrackIdToBytes(id)); + return; + } + + std::shared_ptr<TrackData> existing_data = dbGetTrackData(*existing_hash); + if (!existing_data) { + // We found a hash that matches, but there's no data record? Weird. + auto new_data = std::make_shared<TrackData>(); + new_data->id = dbMintNewTrackId(); + new_data->filepath = path; + new_data->tags_hash = hash; + new_data->modified_at = modified; + dbIngestTagHashes(*tags, new_data->individual_tag_hashes); + dbPutTrackData(*new_data); + auto t = std::make_shared<Track>(new_data, tags); + dbCreateIndexesForTrack(*t); + db_->Put(leveldb::WriteOptions{}, EncodePathKey(path), + TrackIdToBytes(new_data->id)); + return; + } + + if (existing_data->is_tombstoned) { + ESP_LOGI(kTag, "exhuming track %lu", existing_data->id); + existing_data->is_tombstoned = false; + existing_data->modified_at = modified; + dbPutTrackData(*existing_data); + auto t = std::make_shared<Track>(existing_data, tags); + dbCreateIndexesForTrack(*t); + db_->Put(leveldb::WriteOptions{}, EncodePathKey(path), + TrackIdToBytes(existing_data->id)); + } else if (existing_data->filepath != + std::pmr::string{path.data(), path.size()}) { + ESP_LOGW(kTag, "hash collision: %s, %s, %s", + tags->title().value_or("no title").c_str(), + tags->artist().value_or("no artist").c_str(), + tags->album().value_or("no album").c_str()); + } + }); +} + +auto Database::isUpdating() -> bool { + return is_updating_; +} + +auto Database::dbMintNewTrackId() -> TrackId { + TrackId next_id = 1; + std::string val; + auto status = db_->Get(leveldb::ReadOptions(), kKeyTrackId, &val); + if (status.ok()) { + next_id = BytesToTrackId(val).value_or(next_id); + } else if (!status.IsNotFound()) { + // TODO(jacqueline): Handle this more. + ESP_LOGE(kTag, "failed to get next track id"); + } + + if (!db_->Put(leveldb::WriteOptions(), kKeyTrackId, + TrackIdToBytes(next_id + 1)) + .ok()) { + ESP_LOGE(kTag, "failed to write next track id"); + } + + return next_id; +} + +auto Database::dbEntomb(TrackId id, uint64_t hash) -> void { + std::string key = EncodeHashKey(hash); + std::string val = EncodeHashValue(id); + if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) { + ESP_LOGE(kTag, "failed to entomb #%llx (id #%lx)", hash, id); + } +} + +auto Database::dbPutTrackData(const TrackData& s) -> void { + std::string key = EncodeDataKey(s.id); + std::string val = EncodeDataValue(s); + if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) { + ESP_LOGE(kTag, "failed to write data for #%lx", s.id); + } +} + +auto Database::dbGetTrackData(TrackId id) -> std::shared_ptr<TrackData> { + std::string key = EncodeDataKey(id); + std::string raw_val; + if (!db_->Get(leveldb::ReadOptions(), key, &raw_val).ok()) { + ESP_LOGW(kTag, "no key found for #%lx", id); + return {}; + } + return ParseDataValue(raw_val); +} + +auto Database::dbPutHash(const uint64_t& hash, TrackId i) -> void { + std::string key = EncodeHashKey(hash); + std::string val = EncodeHashValue(i); + if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) { + ESP_LOGE(kTag, "failed to write hash for #%lx", i); + } +} + +auto Database::dbGetHash(const uint64_t& hash) -> std::optional<TrackId> { + std::string key = EncodeHashKey(hash); + std::string raw_val; + if (!db_->Get(leveldb::ReadOptions(), key, &raw_val).ok()) { + ESP_LOGW(kTag, "no key found for hash #%llx", hash); + return {}; + } + return ParseHashValue(raw_val); +} + +auto Database::dbCreateIndexesForTrack(const Track& track) -> void { + for (const IndexInfo& index : getIndexes()) { + leveldb::WriteBatch writes; + auto entries = Index(collator_, index, track); + for (const auto& it : entries) { + writes.Put(EncodeIndexKey(it.first), + {it.second.data(), it.second.size()}); + } + db_->Write(leveldb::WriteOptions(), &writes); + } +} + +auto Database::dbRemoveIndexes(std::shared_ptr<TrackData> data) -> void { + auto tags = dbRecoverTagsFromHashes(data->individual_tag_hashes); + if (!tags) { + return; + } + Track track{data, tags}; + for (const IndexInfo& index : getIndexes()) { + auto entries = Index(collator_, index, track); + for (auto it = entries.rbegin(); it != entries.rend(); it++) { + auto key = EncodeIndexKey(it->first); + auto status = db_->Delete(leveldb::WriteOptions{}, key); + if (!status.ok()) { + return; + } + + std::unique_ptr<leveldb::Iterator> cursor{db_->NewIterator({})}; + cursor->Seek(key); + cursor->Prev(); + + auto prev_key = ParseIndexKey(cursor->key()); + if (prev_key && prev_key->header == it->first.header) { + break; + } + + cursor->Next(); + auto next_key = ParseIndexKey(cursor->key()); + if (next_key && next_key->header == it->first.header) { + break; + } + } + } +} + +auto Database::dbIngestTagHashes(const TrackTags& tags, + std::pmr::unordered_map<Tag, uint64_t>& out) + -> void { + leveldb::WriteBatch batch{}; + for (const auto& tag : tags.allPresent()) { + auto val = tags.get(tag); + auto hash = tagHash(val); + batch.Put(EncodeTagHashKey(hash), tagToString(val)); + out[tag] = hash; + } + db_->Write(leveldb::WriteOptions{}, &batch); +} + +auto Database::dbRecoverTagsFromHashes( + const std::pmr::unordered_map<Tag, uint64_t>& hashes) + -> std::shared_ptr<TrackTags> { + auto out = std::make_shared<TrackTags>(); + for (const auto& entry : hashes) { + std::string value; + auto res = db_->Get(leveldb::ReadOptions{}, EncodeTagHashKey(entry.second), + &value); + if (!res.ok()) { + ESP_LOGI(kTag, "failed to retrieve tag!"); + continue; + } + out->set(entry.first, {value.data(), value.size()}); + } + return out; +} + +auto seekToOffset(leveldb::Iterator* it, int offset) { + while (it->Valid() && offset != 0) { + if (offset < 0) { + it->Prev(); + offset++; + } else { + it->Next(); + offset--; + } + } +} + +auto Database::getRecord(const SearchKey& c) + -> std::optional<std::pair<std::pmr::string, Record>> { + std::unique_ptr<leveldb::Iterator> it{ + db_->NewIterator(leveldb::ReadOptions{})}; + + it->Seek(c.startKey()); + seekToOffset(it.get(), c.offset); + if (!it->Valid() || !it->key().starts_with(std::string_view{c.prefix})) { + return {}; + } + + std::optional<IndexKey> key = ParseIndexKey(it->key()); + if (!key) { + ESP_LOGW(kTag, "parsing index key failed"); + return {}; + } + + return std::make_pair(std::pmr::string{it->key().data(), it->key().size(), + &memory::kSpiRamResource}, + Record{*key, it->value()}); +} + +auto Database::countRecords(const SearchKey& c) -> size_t { + std::unique_ptr<leveldb::Iterator> it{ + db_->NewIterator(leveldb::ReadOptions{})}; + + it->Seek(c.startKey()); + seekToOffset(it.get(), c.offset); + if (!it->Valid() || !it->key().starts_with(std::string_view{c.prefix})) { + return {}; + } + + size_t count = 0; + while (it->Valid() && it->key().starts_with(std::string_view{c.prefix})) { + it->Next(); + count++; + } + + return count; +} + +auto SearchKey::startKey() const -> std::string_view { + if (key) { + return *key; + } + return prefix; +} + +Record::Record(const IndexKey& key, const leveldb::Slice& t) + : text_(t.data(), t.size(), &memory::kSpiRamResource) { + if (key.track) { + contents_ = *key.track; + } else { + contents_ = ExpandHeader(key.header, key.item); + } +} + +auto Record::text() const -> std::string_view { + return text_; +} + +auto Record::contents() const + -> const std::variant<TrackId, IndexKey::Header>& { + return contents_; +} + +Iterator::Iterator(std::shared_ptr<Database> db, IndexId idx) + : Iterator(db, + IndexKey::Header{ + .id = idx, + .depth = 0, + .components_hash = 0, + }) {} + +Iterator::Iterator(std::shared_ptr<Database> db, const IndexKey::Header& header) + : db_(db), key_{}, current_() { + std::string prefix = EncodeIndexPrefix(header); + key_ = { + .prefix = {prefix.data(), prefix.size(), &memory::kSpiRamResource}, + .key = {}, + .offset = 0, + }; + iterate(key_); +} + +auto Iterator::value() const -> const std::optional<Record>& { + return current_; +} + +auto Iterator::next() -> void { + SearchKey new_key = key_; + new_key.offset = 1; + iterate(new_key); +} + +auto Iterator::prev() -> void { + SearchKey new_key = key_; + new_key.offset = -1; + iterate(new_key); +} + +auto Iterator::iterate(const SearchKey& key) -> void { + auto db = db_.lock(); + if (!db) { + ESP_LOGW(kTag, "iterate with dead db"); + return; + } + auto res = db->getRecord(key); + if (res) { + key_ = { + .prefix = key_.prefix, + .key = res->first, + .offset = 0, + }; + current_ = res->second; + } else { + key_ = key; + current_.reset(); + } +} + +auto Iterator::count() const -> size_t { + auto db = db_.lock(); + if (!db) { + ESP_LOGW(kTag, "count with dead db"); + return 0; + } + return db->countRecords(key_); +} + +TrackIterator::TrackIterator(const Iterator& it) : db_(it.db_), levels_() { + levels_.push_back(it); + next(false); +} + +auto TrackIterator::next() -> void { + next(true); +} + +auto TrackIterator::next(bool advance) -> void { + while (!levels_.empty()) { + if (advance) { + levels_.back().next(); + } + + auto& cur = levels_.back().value(); + if (!cur) { + // The current top iterator is out of tracks. Pop it, and move the parent + // to the next item. + levels_.pop_back(); + advance = true; + } else if (std::holds_alternative<IndexKey::Header>(cur->contents())) { + // This record is a branch. Push a new iterator. + auto key = std::get<IndexKey::Header>(cur->contents()); + auto db = db_.lock(); + if (!db) { + return; + } + levels_.emplace_back(db, key); + // Don't skip the first value of the new level. + advance = false; + } else if (std::holds_alternative<TrackId>(cur->contents())) { + // New record is a leaf. + break; + } + } +} + +auto TrackIterator::value() const -> std::optional<TrackId> { + if (levels_.empty()) { + return {}; + } + auto cur = levels_.back().value(); + if (!cur) { + return {}; + } + if (std::holds_alternative<TrackId>(cur->contents())) { + return std::get<TrackId>(cur->contents()); + } + return {}; +} + +auto TrackIterator::count() const -> size_t { + size_t size = 0; + TrackIterator copy{*this}; + while (!copy.levels_.empty()) { + size += copy.levels_.back().count(); + copy.levels_.pop_back(); + copy.next(); + } + return size; +} + +} // namespace database diff --git a/src/tangara/database/database.hpp b/src/tangara/database/database.hpp new file mode 100644 index 00000000..35b76a13 --- /dev/null +++ b/src/tangara/database/database.hpp @@ -0,0 +1,244 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include <stdint.h> +#include <sys/_stdint.h> +#include <cstdint> +#include <future> +#include <memory> +#include <optional> +#include <stack> +#include <string> +#include <utility> +#include <vector> + +#include "collation.hpp" +#include "cppbor.h" +#include "file_gatherer.hpp" +#include "index.hpp" +#include "leveldb/cache.h" +#include "leveldb/db.h" +#include "leveldb/iterator.h" +#include "leveldb/options.h" +#include "leveldb/slice.h" +#include "memory_resource.hpp" +#include "records.hpp" +#include "result.hpp" +#include "tag_parser.hpp" +#include "tasks.hpp" +#include "track.hpp" + +namespace database { + +const uint8_t kCurrentDbVersion = 6; + +struct SearchKey; +class Record; +class Iterator; + +/* + * Handle to an open database. This can be used to store large amounts of + * persistent data on the SD card, in a manner that can be retrieved later very + * quickly. + * + * A database includes a number of 'indexes'. Each index is a sorted, + * hierarchical view of all the playable tracks on the device. + */ +class Database { + public: + enum DatabaseError { + ALREADY_OPEN, + FAILED_TO_OPEN, + }; + static auto Open(IFileGatherer& file_gatherer, + ITagParser& tag_parser, + locale::ICollator& collator, + tasks::WorkerPool& bg_worker) + -> cpp::result<Database*, DatabaseError>; + + static auto Destroy() -> void; + + ~Database(); + + auto schemaVersion() -> std::string; + + auto sizeOnDiskBytes() -> size_t; + + /* Adds an arbitrary record to the database. */ + auto put(const std::string& key, const std::string& val) -> void; + + /* Retrives a value previously stored with `put`. */ + auto get(const std::string& key) -> std::optional<std::string>; + + auto getTrackPath(TrackId id) -> std::optional<std::string>; + auto getTrack(TrackId id) -> std::shared_ptr<Track>; + + auto getIndexes() -> std::vector<IndexInfo>; + auto updateIndexes() -> void; + auto isUpdating() -> bool; + + // Cannot be copied or moved. + Database(const Database&) = delete; + Database& operator=(const Database&) = delete; + + private: + friend class Iterator; + + // Owned. Dumb pointers because destruction needs to be done in an explicit + // order. + leveldb::DB* db_; + leveldb::Cache* cache_; + + // Not owned. + IFileGatherer& file_gatherer_; + ITagParser& tag_parser_; + locale::ICollator& collator_; + + std::atomic<bool> is_updating_; + + Database(leveldb::DB* db, + leveldb::Cache* cache, + IFileGatherer& file_gatherer, + ITagParser& tag_parser, + locale::ICollator& collator); + + auto dbMintNewTrackId() -> TrackId; + + auto dbEntomb(TrackId track, uint64_t hash) -> void; + auto dbPutTrackData(const TrackData& s) -> void; + auto dbGetTrackData(TrackId id) -> std::shared_ptr<TrackData>; + auto dbPutHash(const uint64_t& hash, TrackId i) -> void; + auto dbGetHash(const uint64_t& hash) -> std::optional<TrackId>; + + auto dbCreateIndexesForTrack(const Track& track) -> void; + auto dbRemoveIndexes(std::shared_ptr<TrackData>) -> void; + + auto dbIngestTagHashes(const TrackTags&, + std::pmr::unordered_map<Tag, uint64_t>&) -> void; + auto dbRecoverTagsFromHashes(const std::pmr::unordered_map<Tag, uint64_t>&) + -> std::shared_ptr<TrackTags>; + + auto getRecord(const SearchKey& c) + -> std::optional<std::pair<std::pmr::string, Record>>; + auto countRecords(const SearchKey& c) -> size_t; +}; + +/* + * Container for the data needed to iterate through database records. This is a + * lower-level type that the higher-level iterators are built from; most users + * outside this namespace shouldn't need to work with continuations. + */ +struct SearchKey { + std::pmr::string prefix; + /* If not given, then iteration starts from `prefix`. */ + std::optional<std::pmr::string> key; + int offset; + + auto startKey() const -> std::string_view; +}; + +/* + * A record belonging to one of the database's indexes. This may either be a + * leaf record, containing a track id, or a branch record, containing a new + * Header to retrieve results at the next level of the index. + */ +class Record { + public: + Record(const IndexKey&, const leveldb::Slice&); + + Record(const Record&) = default; + Record& operator=(const Record& other) = default; + + auto text() const -> std::string_view; + auto contents() const -> const std::variant<TrackId, IndexKey::Header>&; + + private: + std::pmr::string text_; + std::variant<TrackId, IndexKey::Header> contents_; +}; + +/* + * Utility for accessing a large set of database records, one record at a time. + */ +class Iterator { + public: + Iterator(std::shared_ptr<Database>, IndexId); + Iterator(std::shared_ptr<Database>, const IndexKey::Header&); + + Iterator(const Iterator&) = default; + Iterator& operator=(const Iterator& other) = default; + + auto value() const -> const std::optional<Record>&; + std::optional<Record> operator*() const { return value(); } + + auto next() -> void; + std::optional<Record> operator++() { + next(); + return value(); + } + std::optional<Record> operator++(int) { + auto val = value(); + next(); + return val; + } + + auto prev() -> void; + std::optional<Record> operator--() { + prev(); + return value(); + } + std::optional<Record> operator--(int) { + auto val = value(); + prev(); + return val; + } + + auto count() const -> size_t; + + private: + auto iterate(const SearchKey& key) -> void; + + friend class TrackIterator; + + std::weak_ptr<Database> db_; + SearchKey key_; + std::optional<Record> current_; +}; + +class TrackIterator { + public: + TrackIterator(const Iterator&); + + TrackIterator(const TrackIterator&) = default; + TrackIterator& operator=(TrackIterator&& other) = default; + + auto value() const -> std::optional<TrackId>; + std::optional<TrackId> operator*() const { return value(); } + + auto next() -> void; + std::optional<TrackId> operator++() { + next(); + return value(); + } + std::optional<TrackId> operator++(int) { + auto val = value(); + next(); + return val; + } + + auto count() const -> size_t; + + private: + TrackIterator(std::weak_ptr<Database>); + auto next(bool advance) -> void; + + std::weak_ptr<Database> db_; + std::vector<Iterator> levels_; +}; + +} // namespace database diff --git a/src/tangara/database/db_events.hpp b/src/tangara/database/db_events.hpp new file mode 100644 index 00000000..a1aefc27 --- /dev/null +++ b/src/tangara/database/db_events.hpp @@ -0,0 +1,29 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include <stdint.h> +#include "tinyfsm.hpp" + +namespace database { +namespace event { + +struct UpdateStarted : tinyfsm::Event {}; + +struct UpdateFinished : tinyfsm::Event {}; + +struct UpdateProgress : tinyfsm::Event { + enum class Stage { + kVerifyingExistingTracks, + kScanningForNewTracks, + }; + Stage stage; + uint64_t val; +}; + +} // namespace event +} // namespace database diff --git a/src/tangara/database/env_esp.cpp b/src/tangara/database/env_esp.cpp new file mode 100644 index 00000000..f7a5637a --- /dev/null +++ b/src/tangara/database/env_esp.cpp @@ -0,0 +1,497 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "env_esp.hpp" + +#include <atomic> +#include <cerrno> +#include <cstddef> +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <functional> +#include <limits> +#include <memory> +#include <mutex> +#include <queue> +#include <set> +#include <string> +#include <thread> +#include <type_traits> +#include <utility> + +#include "esp_heap_caps.h" +#include "esp_log.h" +#include "ff.h" +#include "freertos/FreeRTOS.h" +#include "freertos/portmacro.h" +#include "freertos/projdefs.h" +#include "freertos/queue.h" +#include "freertos/task.h" +#include "leveldb/env.h" +#include "leveldb/slice.h" +#include "leveldb/status.h" + +#include "spi.hpp" +#include "tasks.hpp" + +namespace leveldb { + +tasks::WorkerPool *sBackgroundThread = nullptr; + +std::string ErrToStr(FRESULT err) { + switch (err) { + case FR_OK: + return "FR_OK"; + case FR_DISK_ERR: + return "FR_DISK_ERR"; + case FR_INT_ERR: + return "FR_INT_ERR"; + case FR_NOT_READY: + return "FR_NOT_READY"; + case FR_NO_FILE: + return "FR_NO_FILE"; + case FR_NO_PATH: + return "FR_NO_PATH"; + case FR_INVALID_NAME: + return "FR_INVALID_NAME"; + case FR_DENIED: + return "FR_DENIED"; + case FR_EXIST: + return "FR_EXIST"; + case FR_INVALID_OBJECT: + return "FR_INVALID_OBJECT"; + case FR_WRITE_PROTECTED: + return "FR_WRITE_PROTECTED"; + case FR_INVALID_DRIVE: + return "FR_INVALID_DRIVE"; + case FR_NOT_ENABLED: + return "FR_NOT_ENABLED"; + case FR_NO_FILESYSTEM: + return "FR_NO_FILESYSTEM"; + case FR_MKFS_ABORTED: + return "FR_MKFS_ABORTED"; + case FR_TIMEOUT: + return "FR_TIMEOUT"; + case FR_LOCKED: + return "FR_LOCKED"; + case FR_NOT_ENOUGH_CORE: + return "FR_NOT_ENOUGH_CORE"; + case FR_TOO_MANY_OPEN_FILES: + return "FR_TOO_MANY_OPEN_FILES"; + case FR_INVALID_PARAMETER: + return "FR_INVALID_PARAMETER"; + default: + return "UNKNOWN"; + } +} + +Status EspError(const std::string& context, FRESULT err) { + if (err == FR_NO_FILE) { + return Status::NotFound(context, ErrToStr(err)); + } else { + return Status::IOError(context, ErrToStr(err)); + } +} + +class EspSequentialFile final : public SequentialFile { + public: + EspSequentialFile(const std::string& filename, FIL file) + : file_(file), filename_(filename) {} + ~EspSequentialFile() override { + auto lock = drivers::acquire_spi(); + f_close(&file_); + } + + Status Read(size_t n, Slice* result, char* scratch) override { + auto lock = drivers::acquire_spi(); + UINT read_size = 0; + FRESULT res = f_read(&file_, scratch, n, &read_size); + if (res != FR_OK) { // Read error. + return EspError(filename_, res); + } + *result = Slice(scratch, read_size); + return Status::OK(); + } + + Status Skip(uint64_t n) override { + auto lock = drivers::acquire_spi(); + DWORD current_pos = f_tell(&file_); + FRESULT res = f_lseek(&file_, current_pos + n); + if (res != FR_OK) { + return EspError(filename_, res); + } + return Status::OK(); + } + + private: + FIL file_; + const std::string filename_; +}; + +// Implements random read access in a file using pread(). +// +// Instances of this class are thread-safe, as required by the RandomAccessFile +// API. Instances are immutable and Read() only calls thread-safe library +// functions. +class EspRandomAccessFile final : public RandomAccessFile { + public: + // The new instance takes ownership of |fd|. |fd_limiter| must outlive this + // instance, and will be used to determine if . + explicit EspRandomAccessFile(const std::string& filename) + : filename_(std::move(filename)) {} + + ~EspRandomAccessFile() override {} + + Status Read(uint64_t offset, + size_t n, + Slice* result, + char* scratch) const override { + auto lock = drivers::acquire_spi(); + FIL file; + FRESULT res = f_open(&file, filename_.c_str(), FA_READ); + if (res != FR_OK) { + return EspError(filename_, res); + } + + res = f_lseek(&file, offset); + if (res != FR_OK) { + return EspError(filename_, res); + } + + Status status; + UINT read_size = 0; + res = f_read(&file, scratch, n, &read_size); + if (res != FR_OK || read_size == 0) { + return EspError(filename_, res); + } + *result = Slice(scratch, read_size); + + f_close(&file); + + return status; + } + + private: + const std::string filename_; +}; + +// TODO(jacqueline): LevelDB expects writes to this class to be buffered in +// memory. FatFs already does in-memory buffering, but we should think about +// whether to layer more on top. +class EspWritableFile final : public WritableFile { + public: + EspWritableFile(std::string filename, FIL file) + : filename_(std::move(filename)), file_(file), is_open_(true) {} + + ~EspWritableFile() override { + if (is_open_) { + // Ignoring any potential errors + Close(); + } + } + + Status Append(const Slice& data) override { + if (!is_open_) { + return EspError(filename_, FR_NOT_ENABLED); + } + + auto lock = drivers::acquire_spi(); + size_t write_size = data.size(); + const char* write_data = data.data(); + + UINT bytes_written = 0; + FRESULT res = f_write(&file_, write_data, write_size, &bytes_written); + if (res != FR_OK) { + return EspError(filename_, res); + } + + return Status::OK(); + } + + Status Close() override { + auto lock = drivers::acquire_spi(); + is_open_ = false; + FRESULT res = f_close(&file_); + if (res != FR_OK) { + return EspError(filename_, res); + } + return Status::OK(); + } + + Status Flush() override { return Sync(); } + + Status Sync() override { + if (!is_open_) { + return EspError(filename_, FR_NOT_ENABLED); + } + auto lock = drivers::acquire_spi(); + FRESULT res = f_sync(&file_); + if (res != FR_OK) { + return EspError(filename_, res); + } + return Status::OK(); + } + + private: + const std::string filename_; + FIL file_; + bool is_open_; +}; + +class EspFileLock : public FileLock { + public: + explicit EspFileLock(const std::string& filename) : filename_(filename) {} + const std::string& filename() { return filename_; } + + private: + const std::string filename_; +}; + +class EspLogger final : public Logger { + public: + explicit EspLogger(FIL file) : file_(file) {} + ~EspLogger() override { f_close(&file_); } + + void Logv(const char* format, std::va_list ap) override { + /* + std::va_list args_copy; + va_copy(args_copy, ap); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-nonliteral" + std::size_t bytes_needed = snprintf(NULL, 0, format, args_copy); + char* output = reinterpret_cast<char*>( + heap_caps_calloc(bytes_needed, 1, MALLOC_CAP_SPIRAM)); + snprintf(output, bytes_needed, format, args_copy); +#pragma GCC diagnostic pop + va_end(args_copy); + ESP_LOGI("LEVELDB", "%s", output); + // f_puts(output, &file_); + free(reinterpret_cast<void*>(output)); + */ + } + + private: + FIL file_; +}; + +EspEnv::~EspEnv() { + ESP_LOGE("LEVELDB", "EspEnv singleton destroyed. Unsupported behavior!"); +} + +Status EspEnv::NewSequentialFile(const std::string& filename, + SequentialFile** result) { + auto lock = drivers::acquire_spi(); + FIL file; + FRESULT res = f_open(&file, filename.c_str(), FA_READ); + if (res != FR_OK) { + *result = nullptr; + return EspError(filename, res); + } + + *result = new EspSequentialFile(filename, file); + return Status::OK(); +} + +Status EspEnv::NewRandomAccessFile(const std::string& filename, + RandomAccessFile** result) { + auto lock = drivers::acquire_spi(); + // EspRandomAccessFile doesn't try to open the file until it's needed, so + // we need to first ensure the file exists to handle the NotFound case + // correctly. + FILINFO info; + FRESULT res = f_stat(filename.c_str(), &info); + if (res != FR_OK) { + *result = nullptr; + return EspError(filename, res); + } + + *result = new EspRandomAccessFile(filename); + return Status::OK(); +} + +Status EspEnv::NewWritableFile(const std::string& filename, + WritableFile** result) { + auto lock = drivers::acquire_spi(); + FIL file; + FRESULT res = f_open(&file, filename.c_str(), FA_WRITE | FA_CREATE_ALWAYS); + if (res != FR_OK) { + *result = nullptr; + return EspError(filename, res); + } + + *result = new EspWritableFile(filename, file); + return Status::OK(); +} + +Status EspEnv::NewAppendableFile(const std::string& filename, + WritableFile** result) { + auto lock = drivers::acquire_spi(); + FIL file; + FRESULT res = f_open(&file, filename.c_str(), FA_WRITE | FA_OPEN_APPEND); + if (res != FR_OK) { + *result = nullptr; + return EspError(filename, res); + } + + *result = new EspWritableFile(filename, file); + return Status::OK(); +} + +bool EspEnv::FileExists(const std::string& filename) { + auto lock = drivers::acquire_spi(); + FILINFO info; + return f_stat(filename.c_str(), &info) == FR_OK; +} + +Status EspEnv::GetChildren(const std::string& directory_path, + std::vector<std::string>* result) { + result->clear(); + + auto lock = drivers::acquire_spi(); + FF_DIR dir; + FRESULT res = f_opendir(&dir, directory_path.c_str()); + if (res != FR_OK) { + return EspError(directory_path, res); + } + + FILINFO info; + for (;;) { + res = f_readdir(&dir, &info); + if (res != FR_OK) { + return EspError(directory_path, res); + } + if (info.fname[0] == 0) { + break; + } + result->emplace_back(info.fname); + } + + res = f_closedir(&dir); + if (res != FR_OK) { + return EspError(directory_path, res); + } + + return Status::OK(); +} + +Status EspEnv::RemoveFile(const std::string& filename) { + auto lock = drivers::acquire_spi(); + FRESULT res = f_unlink(filename.c_str()); + if (res != FR_OK) { + return EspError(filename, res); + } + return Status::OK(); +} + +Status EspEnv::CreateDir(const std::string& dirname) { + auto lock = drivers::acquire_spi(); + FRESULT res = f_mkdir(dirname.c_str()); + if (res != FR_OK) { + return EspError(dirname, res); + } + return Status::OK(); +} + +Status EspEnv::RemoveDir(const std::string& dirname) { + return RemoveFile(dirname); +} + +Status EspEnv::GetFileSize(const std::string& filename, uint64_t* size) { + auto lock = drivers::acquire_spi(); + FILINFO info; + FRESULT res = f_stat(filename.c_str(), &info); + if (res != FR_OK) { + *size = 0; + return EspError(filename, res); + } + *size = info.fsize; + return Status::OK(); +} + +Status EspEnv::RenameFile(const std::string& from, const std::string& to) { + // Match the POSIX behaviour of replacing any existing file. + if (FileExists(to)) { + Status s = RemoveFile(to); + if (!s.ok()) { + return s; + } + } + auto lock = drivers::acquire_spi(); + FRESULT res = f_rename(from.c_str(), to.c_str()); + if (res != FR_OK) { + return EspError(from, res); + } + return Status::OK(); +} + +Status EspEnv::LockFile(const std::string& filename, FileLock** lock) { + *lock = nullptr; + + if (!locks_.Insert(filename)) { + return Status::IOError("lock " + filename, "already held by process"); + } + + *lock = new EspFileLock(filename); + return Status::OK(); +} + +Status EspEnv::UnlockFile(FileLock* lock) { + EspFileLock* posix_file_lock = static_cast<EspFileLock*>(lock); + locks_.Remove(posix_file_lock->filename()); + delete posix_file_lock; + return Status::OK(); +} + +void EspEnv::StartThread(void (*thread_main)(void* thread_main_arg), + void* thread_main_arg) { + std::thread new_thread(thread_main, thread_main_arg); + new_thread.detach(); +} + +Status EspEnv::GetTestDirectory(std::string* result) { + CreateDir("/tmp"); + *result = "/tmp"; + return Status::OK(); +} + +Status EspEnv::NewLogger(const std::string& filename, Logger** result) { + auto lock = drivers::acquire_spi(); + FIL file; + FRESULT res = f_open(&file, filename.c_str(), FA_WRITE | FA_OPEN_APPEND); + if (res != FR_OK) { + *result = nullptr; + return EspError(filename, res); + } + + *result = new EspLogger(file); + return Status::OK(); +} + +uint64_t EspEnv::NowMicros() { + struct timeval tv_now; + gettimeofday(&tv_now, NULL); + return (int64_t)tv_now.tv_sec * 1000000L + (int64_t)tv_now.tv_usec; +} + +void EspEnv::SleepForMicroseconds(int micros) { + vTaskDelay(pdMS_TO_TICKS(micros / 1000)); +} + +EspEnv::EspEnv() {} + +void EspEnv::Schedule( + void (*background_work_function)(void* background_work_arg), + void* background_work_arg) { + auto worker = sBackgroundThread; + if (worker) { + worker->Dispatch<void>( + [=]() { std::invoke(background_work_function, background_work_arg); }); + } +} + +} // namespace leveldb diff --git a/src/tangara/database/env_esp.hpp b/src/tangara/database/env_esp.hpp new file mode 100644 index 00000000..472a72a6 --- /dev/null +++ b/src/tangara/database/env_esp.hpp @@ -0,0 +1,143 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include <memory> +#include <mutex> +#include <set> +#include <string> + +#include "leveldb/env.h" +#include "leveldb/status.h" + +#include "tasks.hpp" + +namespace leveldb { + +extern tasks::WorkerPool* sBackgroundThread; + +// Tracks the files locked by EspEnv::LockFile(). +// +// We maintain a separate set instead of relying on fcntl(F_SETLK) because +// fcntl(F_SETLK) does not provide any protection against multiple uses from the +// same process. +// +// Instances are thread-safe because all member data is guarded by a mutex. +class InMemoryLockTable { + public: + bool Insert(const std::string& fname) { + mu_.lock(); + bool succeeded = locked_files_.insert(fname).second; + mu_.unlock(); + return succeeded; + } + void Remove(const std::string& fname) { + mu_.lock(); + locked_files_.erase(fname); + mu_.unlock(); + } + + private: + std::mutex mu_; + std::set<std::string> locked_files_; +}; + +class EspEnv : public leveldb::Env { + public: + EspEnv(); + ~EspEnv() override; + + Status NewSequentialFile(const std::string& filename, + SequentialFile** result) override; + + Status NewRandomAccessFile(const std::string& filename, + RandomAccessFile** result) override; + + Status NewWritableFile(const std::string& filename, + WritableFile** result) override; + + Status NewAppendableFile(const std::string& filename, + WritableFile** result) override; + + bool FileExists(const std::string& filename) override; + + Status GetChildren(const std::string& directory_path, + std::vector<std::string>* result) override; + + Status RemoveFile(const std::string& filename) override; + + Status CreateDir(const std::string& dirname) override; + + Status RemoveDir(const std::string& dirname) override; + + Status GetFileSize(const std::string& filename, uint64_t* size) override; + + Status RenameFile(const std::string& from, const std::string& to) override; + + Status LockFile(const std::string& filename, FileLock** lock) override; + + Status UnlockFile(FileLock* lock) override; + + void Schedule(void (*background_work_function)(void* background_work_arg), + void* background_work_arg) override; + + void StartThread(void (*thread_main)(void* thread_main_arg), + void* thread_main_arg) override; + + Status GetTestDirectory(std::string* result) override; + + Status NewLogger(const std::string& filename, Logger** result) override; + + uint64_t NowMicros() override; + + void SleepForMicroseconds(int micros) override; + + void BackgroundThreadMain(); + + private: + InMemoryLockTable locks_; // Thread-safe. +}; + +} // namespace leveldb + +namespace database { + +// Wraps an Env instance whose destructor is never created. +// +// Intended usage: +// using PlatformSingletonEnv = SingletonEnv<PlatformEnv>; +// void ConfigurePosixEnv(int param) { +// PlatformSingletonEnv::AssertEnvNotInitialized(); +// // set global configuration flags. +// } +// Env* Env::Default() { +// static PlatformSingletonEnv default_env; +// return default_env.env(); +// } +template <typename EnvType> +class SingletonEnv { + public: + SingletonEnv() { + static_assert(sizeof(env_storage_) >= sizeof(EnvType), + "env_storage_ will not fit the Env"); + static_assert(alignof(decltype(env_storage_)) >= alignof(EnvType), + "env_storage_ does not meet the Env's alignment needs"); + new (&env_storage_) EnvType(); + } + ~SingletonEnv() = default; + + SingletonEnv(const SingletonEnv&) = delete; + SingletonEnv& operator=(const SingletonEnv&) = delete; + + leveldb::Env* env() { return reinterpret_cast<leveldb::Env*>(&env_storage_); } + + private: + typename std::aligned_storage<sizeof(EnvType), alignof(EnvType)>::type + env_storage_; +}; + +} // namespace database diff --git a/src/tangara/database/file_gatherer.cpp b/src/tangara/database/file_gatherer.cpp new file mode 100644 index 00000000..b7b7271e --- /dev/null +++ b/src/tangara/database/file_gatherer.cpp @@ -0,0 +1,80 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "file_gatherer.hpp" + +#include <deque> +#include <functional> +#include <sstream> +#include <string> + +#include "ff.h" + +#include "memory_resource.hpp" +#include "spi.hpp" + +namespace database { + +static_assert(sizeof(TCHAR) == sizeof(char), "TCHAR must be CHAR"); + +auto FileGathererImpl::FindFiles( + const std::string& root, + std::function<void(std::string_view, const FILINFO&)> cb) -> void { + std::pmr::deque<std::pmr::string> to_explore{&memory::kSpiRamResource}; + to_explore.push_back({root.data(), root.size()}); + + while (!to_explore.empty()) { + auto next_path_str = to_explore.front(); + to_explore.pop_front(); + + const TCHAR* next_path = static_cast<const TCHAR*>(next_path_str.c_str()); + + FF_DIR dir; + FRESULT res; + { + auto lock = drivers::acquire_spi(); + res = f_opendir(&dir, next_path); + } + if (res != FR_OK) { + // TODO: log. + continue; + } + + for (;;) { + FILINFO info; + { + auto lock = drivers::acquire_spi(); + res = f_readdir(&dir, &info); + } + if (res != FR_OK || info.fname[0] == 0) { + // No more files in the directory. + break; + } else if (info.fattrib & (AM_HID | AM_SYS) || info.fname[0] == '.') { + // System or hidden file. Ignore it and move on. + continue; + } else { + std::pmr::string full_path{&memory::kSpiRamResource}; + full_path += next_path_str; + full_path += "/"; + full_path += info.fname; + + if (info.fattrib & AM_DIR) { + // This is a directory. Add it to the explore queue. + to_explore.push_back(full_path); + } else { + // This is a file! Let the callback know about it. + // std::invoke(cb, full_path.str(), info); + std::invoke(cb, full_path, info); + } + } + } + + auto lock = drivers::acquire_spi(); + f_closedir(&dir); + } +} + +} // namespace database diff --git a/src/tangara/database/file_gatherer.hpp b/src/tangara/database/file_gatherer.hpp new file mode 100644 index 00000000..685bdb2c --- /dev/null +++ b/src/tangara/database/file_gatherer.hpp @@ -0,0 +1,36 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include <deque> +#include <functional> +#include <sstream> +#include <string> + +#include "ff.h" + +namespace database { + +class IFileGatherer { + public: + virtual ~IFileGatherer(){}; + + virtual auto FindFiles( + const std::string& root, + std::function<void(std::string_view, const FILINFO&)> cb) + -> void = 0; +}; + +class FileGathererImpl : public IFileGatherer { + public: + virtual auto FindFiles( + const std::string& root, + std::function<void(std::string_view, const FILINFO&)> cb) + -> void override; +}; + +} // namespace database diff --git a/src/tangara/database/future_fetcher.hpp b/src/tangara/database/future_fetcher.hpp new file mode 100644 index 00000000..e8ce9729 --- /dev/null +++ b/src/tangara/database/future_fetcher.hpp @@ -0,0 +1,62 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include <memory> +#include <utility> + +#include "database.hpp" + +namespace database { + +/* + * Utility to simplify waiting for a std::future to complete without blocking. + * Each instance is good for a single future, and does not directly own anything + * other than the future itself. + */ +template <typename T> +class FutureFetcher { + public: + explicit FutureFetcher(std::future<T>&& fut) + : is_consumed_(false), fut_(std::move(fut)) {} + + /* + * Returns whether or not the underlying future is still awaiting async work. + */ + auto Finished() -> bool { + if (!fut_.valid()) { + return true; + } + if (fut_.wait_for(std::chrono::seconds(0)) != std::future_status::ready) { + return false; + } + return true; + } + + /* + * Returns the result of the future, and releases ownership of the underling + * resource. Will return an absent value if the future became invalid (e.g. + * the promise associated with it was destroyed.) + */ + auto Result() -> std::optional<T> { + assert(!is_consumed_); + if (is_consumed_) { + return {}; + } + is_consumed_ = true; + if (!fut_.valid()) { + return {}; + } + return fut_.get(); + } + + private: + bool is_consumed_; + std::future<T> fut_; +}; + +} // namespace database diff --git a/src/tangara/database/index.cpp b/src/tangara/database/index.cpp new file mode 100644 index 00000000..328c3b43 --- /dev/null +++ b/src/tangara/database/index.cpp @@ -0,0 +1,206 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "index.hpp" +#include <sys/_stdint.h> + +#include <cstdint> +#include <iomanip> +#include <iostream> +#include <sstream> +#include <string> +#include <variant> +#include <vector> + +#include "collation.hpp" +#include "cppbor.h" +#include "esp_log.h" +#include "komihash.h" +#include "leveldb/write_batch.h" + +#include "records.hpp" +#include "track.hpp" + +namespace database { + +[[maybe_unused]] static const char* kTag = "index"; + +const IndexInfo kAlbumsByArtist{ + .id = 1, + .name = "Albums by Artist", + .components = {Tag::kAlbumArtist, Tag::kAlbum, Tag::kAlbumOrder}, +}; + +const IndexInfo kTracksByGenre{ + .id = 2, + .name = "Tracks by Genre", + .components = {Tag::kGenres, Tag::kTitle}, +}; + +const IndexInfo kAllTracks{ + .id = 3, + .name = "All Tracks", + .components = {Tag::kTitle}, +}; + +const IndexInfo kAllAlbums{ + .id = 4, + .name = "All Albums", + .components = {Tag::kAlbum, Tag::kAlbumOrder}, +}; + +class Indexer { + public: + Indexer(locale::ICollator& collator, const Track& t, const IndexInfo& idx) + : collator_(collator), track_(t), index_(idx) {} + + auto index() -> std::vector<std::pair<IndexKey, std::string>>; + + private: + auto handleLevel(const IndexKey::Header& header, + std::span<const Tag> components) -> void; + + auto handleItem(const IndexKey::Header& header, + std::variant<std::pmr::string, uint32_t> item, + std::span<const Tag> components) -> void; + + auto missing_value(Tag tag) -> TagValue { + switch (tag) { + case Tag::kTitle: + return track_.TitleOrFilename(); + case Tag::kArtist: + return "Unknown Artist"; + case Tag::kAlbum: + return "Unknown Album"; + case Tag::kAlbumArtist: + return track_.tags().artist().value_or("Unknown Artist"); + return "Unknown Album"; + case Tag::kGenres: + return std::pmr::vector<std::pmr::string>{}; + case Tag::kDisc: + return 0u; + case Tag::kTrack: + return 0u; + case Tag::kAlbumOrder: + return 0u; + } + return std::monostate{}; + } + + locale::ICollator& collator_; + const Track& track_; + const IndexInfo index_; + + std::vector<std::pair<IndexKey, std::string>> out_; +}; + +auto Indexer::index() -> std::vector<std::pair<IndexKey, std::string>> { + out_.clear(); + + IndexKey::Header root_header{ + .id = index_.id, + .depth = 0, + .components_hash = 0, + }; + handleLevel(root_header, index_.components); + + return out_; +} + +auto Indexer::handleLevel(const IndexKey::Header& header, + std::span<const Tag> components) -> void { + Tag component = components.front(); + TagValue value = track_.tags().get(component); + if (std::holds_alternative<std::monostate>(value)) { + value = missing_value(component); + } + + std::visit( + [&](auto&& arg) { + using T = std::decay_t<decltype(arg)>; + if constexpr (std::is_same_v<T, std::monostate>) { + ESP_LOGW(kTag, "dropping component without value: %s", + tagName(components.front()).c_str()); + } else if constexpr (std::is_same_v<T, std::pmr::string>) { + handleItem(header, arg, components); + } else if constexpr (std::is_same_v<T, uint32_t>) { + handleItem(header, arg, components); + } else if constexpr (std::is_same_v< + T, std::span<const std::pmr::string>>) { + for (const auto& i : arg) { + handleItem(header, i, components); + } + } + }, + value); +} + +auto Indexer::handleItem(const IndexKey::Header& header, + std::variant<std::pmr::string, uint32_t> item, + std::span<const Tag> components) -> void { + IndexKey key{ + .header = header, + .item = {}, + .track = {}, + }; + std::string value; + + std::string item_text; + std::visit( + [&](auto&& arg) { + using T = std::decay_t<decltype(arg)>; + if constexpr (std::is_same_v<T, std::pmr::string>) { + value = {arg.data(), arg.size()}; + auto xfrm = collator_.Transform(value); + key.item = {xfrm.data(), xfrm.size()}; + } else if constexpr (std::is_same_v<T, uint32_t>) { + value = std::to_string(arg); + // FIXME: this sucks lol. we should just write the number directly, + // LSB-first, but then we need to be able to parse it back properly. + std::ostringstream str; + str << std::setw(8) << std::setfill('0') << arg; + std::string encoded = str.str(); + key.item = {encoded.data(), encoded.size()}; + } + }, + item); + + std::optional<IndexKey::Header> next_level; + if (components.size() == 1) { + value = track_.TitleOrFilename(); + key.track = track_.data().id; + } else { + next_level = ExpandHeader(key.header, key.item); + } + + out_.emplace_back(key, value); + + if (next_level) { + handleLevel(*next_level, components.subspan(1)); + } +} + +auto Index(locale::ICollator& c, const IndexInfo& i, const Track& t) + -> std::vector<std::pair<IndexKey, std::string>> { + Indexer indexer{c, t, i}; + return indexer.index(); +} + +auto ExpandHeader(const IndexKey::Header& header, + const std::optional<std::pmr::string>& component) + -> IndexKey::Header { + IndexKey::Header ret{header}; + ret.depth++; + if (component) { + ret.components_hash = + komihash(component->data(), component->size(), ret.components_hash); + } else { + ret.components_hash = komihash(NULL, 0, ret.components_hash); + } + return ret; +} + +} // namespace database diff --git a/src/tangara/database/index.hpp b/src/tangara/database/index.hpp new file mode 100644 index 00000000..45dae464 --- /dev/null +++ b/src/tangara/database/index.hpp @@ -0,0 +1,78 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include <stdint.h> + +#include <cstdint> +#include <string> +#include <variant> +#include <vector> + +#include "collation.hpp" +#include "leveldb/db.h" +#include "leveldb/slice.h" + +#include "leveldb/write_batch.h" +#include "memory_resource.hpp" +#include "track.hpp" + +namespace database { + +typedef uint8_t IndexId; + +struct IndexInfo { + // Unique id for this index + IndexId id; + // Localised, user-friendly description of this index. e.g. "Albums by Artist" + // or "All Tracks". + std::pmr::string name; + // Specifier for how this index breaks down the database. + std::vector<Tag> components; +}; + +struct IndexKey { + struct Header { + // The index that this key was created for. + IndexId id; + // The number of components of IndexInfo that have already been filtered. + // For example, if an index consists of { kGenre, kArtist }, and this key + // represents an artist, then depth = 1. + std::uint8_t depth; + // The cumulative hash of all filtered components, in order. For example, if + // an index consists of { kArtist, kAlbum, kTitle }, and we are at depth = 2 + // then this may contain hash(hash("Jacqueline"), "My Cool Album"). + std::uint64_t components_hash; + + bool operator==(const Header&) const = default; + }; + Header header; + + // The filterable / selectable item that this key represents. "Jacqueline" for + // kArtist, "My Cool Album" for kAlbum, etc. + std::optional<std::pmr::string> item; + // If this is a leaf component, the track id for this record. + // This could reasonably be the value for a record, but we keep it as a part + // of the key to help with disambiguation. + std::optional<TrackId> track; +}; + +auto Index(locale::ICollator&, const IndexInfo&, const Track&) + -> std::vector<std::pair<IndexKey, std::string>>; + +auto ExpandHeader(const IndexKey::Header&, + const std::optional<std::pmr::string>&) -> IndexKey::Header; + +// Predefined indexes +// TODO(jacqueline): Make these defined at runtime! :) + +extern const IndexInfo kAlbumsByArtist; +extern const IndexInfo kTracksByGenre; +extern const IndexInfo kAllTracks; +extern const IndexInfo kAllAlbums; + +} // namespace database diff --git a/src/tangara/database/records.cpp b/src/tangara/database/records.cpp new file mode 100644 index 00000000..b086be3b --- /dev/null +++ b/src/tangara/database/records.cpp @@ -0,0 +1,260 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "records.hpp" + +#include <stdint.h> +#include <sys/_stdint.h> + +#include <functional> +#include <iomanip> +#include <iostream> +#include <memory_resource> +#include <sstream> +#include <string> +#include <vector> + +#include "cppbor.h" +#include "cppbor_parse.h" +#include "esp_log.h" + +#include "index.hpp" +#include "komihash.h" +#include "memory_resource.hpp" +#include "track.hpp" + +// As LevelDB is a key-value store, each record in the database consists of a +// key and an optional value. +// +// Values, when present, are always cbor-encoded. This is fast, compact, and +// very easy to evolve over time due to its inclusion of type information. +// +// Keys have a more complicated scheme, as for performance we rely heavily on +// LevelDB's sorted storage format. We must therefore worry about clustering of +// similar records, and the sortability of our encoding format. +// Each kind of key consists of a a single-byte prefix, then one or more +// fields separated by null (0) bytes. Each field may be cbor-encoded, or may +// use some bespoke encoding; it depends on whether we want to be able to sort +// by that field. +// For debugging and discussion purposes, we represent field separators +// textually as '/', and write each field as its hex encoding. e.g. a data key +// for the track with id 17 would be written as 'D / 0x11'. + +namespace database { + +[[maybe_unused]] static const char* kTag = "RECORDS"; + +static const char kPathPrefix = 'P'; +static const char kDataPrefix = 'D'; +static const char kHashPrefix = 'H'; +static const char kTagHashPrefix = 'T'; +static const char kIndexPrefix = 'I'; +static const char kFieldSeparator = '\0'; + +static constexpr auto makePrefix(char p) -> std::string { + std::string str; + str += p; + str += kFieldSeparator; + return str; +} + +auto EncodePathKey(std::string_view path) -> std::string { + std::stringstream out{}; + out << makePrefix(kPathPrefix); + out << path; + return out.str(); +} + +/* 'D/' */ +auto EncodeDataPrefix() -> std::string { + return makePrefix(kDataPrefix); +} + +/* 'D/ 0xACAB' */ +auto EncodeDataKey(const TrackId& id) -> std::string { + return EncodeDataPrefix() + TrackIdToBytes(id); +} + +auto EncodeDataValue(const TrackData& track) -> std::string { + auto* tag_hashes = new cppbor::Map{}; // Free'd by Array's dtor. + for (const auto& entry : track.individual_tag_hashes) { + tag_hashes->add(cppbor::Uint{static_cast<uint32_t>(entry.first)}, + cppbor::Uint{entry.second}); + } + cppbor::Array val{ + cppbor::Uint{track.id}, + cppbor::Tstr{track.filepath}, + cppbor::Uint{track.tags_hash}, + cppbor::Bool{track.is_tombstoned}, + cppbor::Uint{track.modified_at.first}, + cppbor::Uint{track.modified_at.second}, + tag_hashes, + }; + return val.toString(); +} + +auto ParseDataValue(const leveldb::Slice& slice) -> std::shared_ptr<TrackData> { + auto [item, unused, err] = cppbor::parseWithViews( + reinterpret_cast<const uint8_t*>(slice.data()), slice.size()); + if (!item || item->type() != cppbor::ARRAY) { + return nullptr; + } + auto vals = item->asArray(); + if (vals->size() != 7 || vals->get(0)->type() != cppbor::UINT || + vals->get(1)->type() != cppbor::TSTR || + vals->get(2)->type() != cppbor::UINT || + vals->get(3)->type() != cppbor::SIMPLE || + vals->get(4)->type() != cppbor::UINT || + vals->get(5)->type() != cppbor::UINT || + vals->get(6)->type() != cppbor::MAP) { + return {}; + } + auto res = std::make_shared<TrackData>(); + res->id = vals->get(0)->asUint()->unsignedValue(); + res->filepath = vals->get(1)->asViewTstr()->view(); + res->tags_hash = vals->get(2)->asUint()->unsignedValue(); + res->is_tombstoned = vals->get(3)->asBool()->value(); + res->modified_at = std::make_pair<uint16_t, uint16_t>( + vals->get(4)->asUint()->unsignedValue(), + vals->get(5)->asUint()->unsignedValue()); + + auto tag_hashes = vals->get(6)->asMap(); + for (const auto& entry : *tag_hashes) { + auto tag = static_cast<Tag>(entry.first->asUint()->unsignedValue()); + res->individual_tag_hashes[tag] = entry.second->asUint()->unsignedValue(); + } + return res; +} + +/* 'H/ 0xBEEF' */ +auto EncodeHashKey(const uint64_t& hash) -> std::string { + return makePrefix(kHashPrefix) + cppbor::Uint{hash}.toString(); +} + +auto ParseHashValue(const leveldb::Slice& slice) -> std::optional<TrackId> { + return BytesToTrackId({slice.data(), slice.size()}); +} + +auto EncodeHashValue(TrackId id) -> std::string { + return TrackIdToBytes(id); +} + +/* 'T/ 0xBEEF' */ +auto EncodeTagHashKey(const uint64_t& hash) -> std::string { + return makePrefix(kTagHashPrefix) + cppbor::Uint{hash}.toString(); +} + +/* 'I/' */ +auto EncodeAllIndexesPrefix() -> std::string { + return makePrefix(kIndexPrefix); +} + +auto EncodeIndexPrefix(const IndexKey::Header& header) -> std::string { + std::ostringstream out; + out << makePrefix(kIndexPrefix); + cppbor::Array val{ + cppbor::Uint{header.id}, + cppbor::Uint{header.depth}, + cppbor::Uint{header.components_hash}, + }; + out << val.toString() << kFieldSeparator; + return out.str(); +} + +/* + * 'I/0xa2/0x686921/0xb9' + * ^ --- trailer + * ^ --- component ("hi!") + * ^ -------- header + * + * The components *must* be encoded in a way that is easy to sort + * lexicographically. The header and footer do not have this restriction, so + * cbor is fine. + * + * We store grouping information within the header; which index, filtered + * components. We store disambiguation information in the trailer; just a track + * id for now, but could reasonably be something like 'release year' as well. + */ +auto EncodeIndexKey(const IndexKey& key) -> std::string { + std::ostringstream out{}; + + out << EncodeIndexPrefix(key.header); + + // The component should already be UTF-8 encoded, so just write it. + if (key.item) { + out << *key.item << kFieldSeparator; + } + + if (key.track) { + out << TrackIdToBytes(*key.track); + } + + return out.str(); +} + +auto ParseIndexKey(const leveldb::Slice& slice) -> std::optional<IndexKey> { + IndexKey result{}; + + auto prefix = EncodeAllIndexesPrefix(); + if (!slice.starts_with(prefix)) { + return {}; + } + + std::string key_data = slice.ToString().substr(prefix.size()); + auto [key, end_of_key, err] = cppbor::parseWithViews( + reinterpret_cast<const uint8_t*>(key_data.data()), key_data.size()); + if (!key || key->type() != cppbor::ARRAY) { + return {}; + } + auto as_array = key->asArray(); + if (as_array->size() != 3 || as_array->get(0)->type() != cppbor::UINT || + as_array->get(1)->type() != cppbor::UINT || + as_array->get(2)->type() != cppbor::UINT) { + return {}; + } + result.header.id = as_array->get(0)->asUint()->unsignedValue(); + result.header.depth = as_array->get(1)->asUint()->unsignedValue(); + result.header.components_hash = as_array->get(2)->asUint()->unsignedValue(); + + size_t header_length = + reinterpret_cast<const char*>(end_of_key) - key_data.data(); + + if (header_length == 0 || header_length >= key_data.size()) { + return {}; + } + + std::istringstream in(key_data.substr(header_length + 1)); + std::stringbuf buffer{}; + + in.get(buffer, kFieldSeparator); + if (buffer.str().size() > 0) { + result.item = buffer.str(); + } + + buffer = {}; + in.get(buffer); + std::string id_str = buffer.str(); + if (id_str.size() > 1) { + result.track = BytesToTrackId(id_str.substr(1)); + } + + return result; +} + +auto TrackIdToBytes(TrackId id) -> std::string { + return cppbor::Uint{id}.toString(); +} + +auto BytesToTrackId(std::span<const char> bytes) -> std::optional<TrackId> { + auto [res, unused, err] = cppbor::parse( + reinterpret_cast<const uint8_t*>(bytes.data()), bytes.size()); + if (!res || res->type() != cppbor::UINT) { + return {}; + } + return res->asUint()->unsignedValue(); +} + +} // namespace database diff --git a/src/tangara/database/records.hpp b/src/tangara/database/records.hpp new file mode 100644 index 00000000..3ca68fea --- /dev/null +++ b/src/tangara/database/records.hpp @@ -0,0 +1,85 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include <stdint.h> + +#include <string> +#include <variant> +#include <vector> + +#include "leveldb/db.h" +#include "leveldb/slice.h" + +#include "index.hpp" +#include "memory_resource.hpp" +#include "track.hpp" + +namespace database { + +auto EncodePathKey(std::string_view path) -> std::string; + +/* + * Returns the prefix added to every TrackData key. This can be used to iterate + * over every data record in the database. + */ +auto EncodeDataPrefix() -> std::string; + +/* Encodes a data key for a track with the specified id. */ +auto EncodeDataKey(const TrackId& id) -> std::string; + +/* + * Encodes a TrackData instance into bytes, in preparation for storing it within + * the database. This encoding is consistent, and will remain stable over time. + */ +auto EncodeDataValue(const TrackData& track) -> std::string; + +/* + * Parses bytes previously encoded via EncodeDataValue back into a TrackData. + * May return nullopt if parsing fails. + */ +auto ParseDataValue(const leveldb::Slice& slice) -> std::shared_ptr<TrackData>; + +/* Encodes a hash key for the specified hash. */ +auto EncodeHashKey(const uint64_t& hash) -> std::string; + +/* + * Encodes a hash value (at this point just a track id) into bytes, in + * preparation for storing within the database. This encoding is consistent, and + * will remain stable over time. + */ +auto EncodeHashValue(TrackId id) -> std::string; + +/* Encodes a hash key for the specified hash. */ +auto EncodeTagHashKey(const uint64_t& hash) -> std::string; + +/* + * Parses bytes previously encoded via EncodeHashValue back into a track id. May + * return nullopt if parsing fails. + */ +auto ParseHashValue(const leveldb::Slice&) -> std::optional<TrackId>; + +/* Encodes a prefix that matches all index keys, of all ids and depths. */ +auto EncodeAllIndexesPrefix() -> std::string; + +/* + */ +auto EncodeIndexPrefix(const IndexKey::Header&) -> std::string; + +auto EncodeIndexKey(const IndexKey&) -> std::string; +auto ParseIndexKey(const leveldb::Slice&) -> std::optional<IndexKey>; + +/* Encodes a TrackId as bytes. */ +auto TrackIdToBytes(TrackId id) -> std::string; + +/* + * Converts a track id encoded via TrackIdToBytes back into a TrackId. May + * return nullopt if parsing fails. + */ +auto BytesToTrackId(std::span<const char> bytes) -> std::optional<TrackId>; + +} // namespace database diff --git a/src/tangara/database/tag_parser.cpp b/src/tangara/database/tag_parser.cpp new file mode 100644 index 00000000..cbcbdcb5 --- /dev/null +++ b/src/tangara/database/tag_parser.cpp @@ -0,0 +1,208 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "tag_parser.hpp" + +#include <cstdint> +#include <cstdlib> +#include <iomanip> +#include <mutex> + +#include "esp_log.h" +#include "ff.h" +#include "spi.hpp" +#include "tags.h" + +#include "memory_resource.hpp" + +namespace database { + +static auto convert_tag(int tag) -> std::optional<Tag> { + switch (tag) { + case Ttitle: + return Tag::kTitle; + case Tartist: + return Tag::kArtist; + case Talbumartist: + return Tag::kAlbumArtist; + case Talbum: + return Tag::kAlbum; + case Ttrack: + return Tag::kTrack; + case Tgenre: + return Tag::kGenres; + default: + return {}; + } +} + +namespace libtags { + +struct Aux { + FIL file; + FILINFO info; + TrackTags* tags; +}; + +static int read(Tagctx* ctx, void* buf, int cnt) { + Aux* aux = reinterpret_cast<Aux*>(ctx->aux); + if (f_eof(&aux->file)) { + return 0; + } + UINT bytes_read; + if (f_read(&aux->file, buf, cnt, &bytes_read) != FR_OK) { + return -1; + } + return bytes_read; +} + +static int seek(Tagctx* ctx, int offset, int whence) { + Aux* aux = reinterpret_cast<Aux*>(ctx->aux); + FRESULT res; + if (whence == 0) { + // Seek from the start of the file. This is f_lseek's behaviour. + res = f_lseek(&aux->file, offset); + } else if (whence == 1) { + // Seek from current offset. + res = f_lseek(&aux->file, aux->file.fptr + offset); + } else if (whence == 2) { + // Seek from the end of the file + res = f_lseek(&aux->file, aux->info.fsize + offset); + } else { + return -1; + } + if (res != FR_OK) { + return -1; + } + return aux->file.fptr; +} + +static void tag(Tagctx* ctx, + int t, + const char* k, + const char* v, + int offset, + int size, + Tagread f) { + Aux* aux = reinterpret_cast<Aux*>(ctx->aux); + auto tag = convert_tag(t); + if (!tag) { + return; + } + std::pmr::string value{v, &memory::kSpiRamResource}; + if (value.empty()) { + return; + } + aux->tags->set(*tag, value); +} + +static void toc(Tagctx* ctx, int ms, int offset) {} + +} // namespace libtags + +static const std::size_t kBufSize = 1024; +[[maybe_unused]] static const char* kTag = "TAGS"; + +TagParserImpl::TagParserImpl() {} + +auto TagParserImpl::ReadAndParseTags(std::string_view path) + -> std::shared_ptr<TrackTags> { + { + std::lock_guard<std::mutex> lock{cache_mutex_}; + std::optional<std::shared_ptr<TrackTags>> cached = + cache_.Get({path.data(), path.size()}); + if (cached) { + return *cached; + } + } + + std::shared_ptr<TrackTags> tags = parseNew(path); + if (!tags) { + return {}; + } + + // There wasn't a track number found in the track's tags. Try to synthesize + // one from the filename, which will sometimes have a track number at the + // start. + if (!tags->track()) { + auto slash_pos = path.find_last_of("/"); + if (slash_pos != std::string::npos && path.size() - slash_pos > 1) { + auto trunc = path.substr(slash_pos + 1); + tags->track({trunc.data(), trunc.size()}); + } + } + + { + std::lock_guard<std::mutex> lock{cache_mutex_}; + cache_.Put({path.data(), path.size(), &memory::kSpiRamResource}, tags); + } + + return tags; +} + +auto TagParserImpl::parseNew(std::string_view p) -> std::shared_ptr<TrackTags> { + std::string path{p}; + libtags::Aux aux; + auto out = TrackTags::create(); + aux.tags = out.get(); + { + auto lock = drivers::acquire_spi(); + + if (f_stat(path.c_str(), &aux.info) != FR_OK || + f_open(&aux.file, path.c_str(), FA_READ) != FR_OK) { + ESP_LOGW(kTag, "failed to open file %s", path.c_str()); + return {}; + } + } + // Fine to have this on the stack; this is only called on tasks with large + // stacks anyway, due to all the string handling. + char buf[kBufSize]; + Tagctx ctx; + ctx.read = libtags::read; + ctx.seek = libtags::seek; + ctx.tag = libtags::tag; + ctx.toc = libtags::toc; + ctx.aux = &aux; + ctx.buf = buf; + ctx.bufsz = kBufSize; + + int res; + { + auto lock = drivers::acquire_spi(); + res = tagsget(&ctx); + f_close(&aux.file); + } + + if (res != 0) { + // Parsing failed. + ESP_LOGE(kTag, "tag parsing for %s failed, reason %d", path.c_str(), res); + return {}; + } + + switch (ctx.format) { + case Fmp3: + out->encoding(Container::kMp3); + break; + case Fogg: + out->encoding(Container::kOgg); + break; + case Fflac: + out->encoding(Container::kFlac); + break; + case Fwav: + out->encoding(Container::kWav); + break; + case Fopus: + out->encoding(Container::kOpus); + break; + default: + out->encoding(Container::kUnsupported); + } + + return out; +} + +} // namespace database diff --git a/src/tangara/database/tag_parser.hpp b/src/tangara/database/tag_parser.hpp new file mode 100644 index 00000000..966258b5 --- /dev/null +++ b/src/tangara/database/tag_parser.hpp @@ -0,0 +1,44 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include <string> + +#include "lru_cache.hpp" +#include "track.hpp" + +namespace database { + +class ITagParser { + public: + virtual ~ITagParser() {} + virtual auto ReadAndParseTags(std::string_view path) + -> std::shared_ptr<TrackTags> = 0; +}; + +class TagParserImpl : public ITagParser { + public: + TagParserImpl(); + auto ReadAndParseTags(std::string_view path) + -> std::shared_ptr<TrackTags> override; + + private: + auto parseNew(std::string_view path) -> std::shared_ptr<TrackTags>; + + /* + * Cache of tags that have already been extracted from files. Ideally this + * cache should be slightly larger than any page sizes in the UI. + */ + std::mutex cache_mutex_; + util::LruCache<8, std::pmr::string, std::shared_ptr<TrackTags>> cache_; + + // We could also consider keeping caches of artist name -> std::string and + // similar. This hasn't been done yet, as this isn't a common workload in + // any of our UI. +}; + +} // namespace database diff --git a/src/tangara/database/test/CMakeLists.txt b/src/tangara/database/test/CMakeLists.txt new file mode 100644 index 00000000..a9f2cedb --- /dev/null +++ b/src/tangara/database/test/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2023 jacqueline <me@jacqueline.id.au> +# +# SPDX-License-Identifier: GPL-3.0-only + +idf_component_register( + SRCS "test_records.cpp" "test_database.cpp" + INCLUDE_DIRS "." + REQUIRES catch2 cmock database drivers fixtures) diff --git a/src/tangara/database/test/test_database.cpp b/src/tangara/database/test/test_database.cpp new file mode 100644 index 00000000..6aec9bfb --- /dev/null +++ b/src/tangara/database/test/test_database.cpp @@ -0,0 +1,210 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "database.hpp" + +#include <stdint.h> +#include <iomanip> +#include <map> +#include <memory> +#include <string> + +#include "catch2/catch.hpp" +#include "driver_cache.hpp" +#include "esp_log.h" +#include "file_gatherer.hpp" +#include "i2c_fixture.hpp" +#include "leveldb/db.h" +#include "spi_fixture.hpp" +#include "tag_parser.hpp" +#include "track.hpp" + +namespace database { + +class TestBackends : public IFileGatherer, public ITagParser { + public: + std::map<std::pmr::string, TrackTags> tracks; + + auto MakeTrack(const std::pmr::string& path, const std::pmr::string& title) + -> void { + TrackTags tags; + tags.encoding = Encoding::kMp3; + tags.title = title; + tracks[path] = tags; + } + + auto FindFiles(const std::pmr::string& root, + std::function<void(const std::pmr::string&)> cb) + -> void override { + for (auto keyval : tracks) { + std::invoke(cb, keyval.first); + } + } + + auto ReadAndParseTags(const std::pmr::string& path, TrackTags* out) + -> bool override { + if (tracks.contains(path)) { + *out = tracks.at(path); + return true; + } + return false; + } +}; + +TEST_CASE("track database", "[integration]") { + I2CFixture i2c; + SpiFixture spi; + drivers::DriverCache drivers; + auto storage = drivers.AcquireStorage(); + + Database::Destroy(); + + TestBackends tracks; + auto open_res = Database::Open(&tracks, &tracks); + REQUIRE(open_res.has_value()); + std::unique_ptr<Database> db(open_res.value()); + + SECTION("empty database") { + std::unique_ptr<Result<Track>> res(db->GetTracks(10).get()); + REQUIRE(res->values().size() == 0); + } + + SECTION("add new tracks") { + tracks.MakeTrack("track1.mp3", "Track 1"); + tracks.MakeTrack("track2.wav", "Track 2"); + tracks.MakeTrack("track3.exe", "Track 3"); + + db->Update(); + + std::unique_ptr<Result<Track>> res(db->GetTracks(10).get()); + REQUIRE(res->values().size() == 3); + CHECK(*res->values().at(0).tags().title == "Track 1"); + CHECK(res->values().at(0).data().id() == 1); + CHECK(*res->values().at(1).tags().title == "Track 2"); + CHECK(res->values().at(1).data().id() == 2); + CHECK(*res->values().at(2).tags().title == "Track 3"); + CHECK(res->values().at(2).data().id() == 3); + + SECTION("update with no filesystem changes") { + db->Update(); + + std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get()); + REQUIRE(new_res->values().size() == 3); + CHECK(res->values().at(0) == new_res->values().at(0)); + CHECK(res->values().at(1) == new_res->values().at(1)); + CHECK(res->values().at(2) == new_res->values().at(2)); + } + + SECTION("update with all tracks gone") { + tracks.tracks.clear(); + + db->Update(); + + std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get()); + CHECK(new_res->values().size() == 0); + + SECTION("update with one track returned") { + tracks.MakeTrack("track2.wav", "Track 2"); + + db->Update(); + + std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get()); + REQUIRE(new_res->values().size() == 1); + CHECK(res->values().at(1) == new_res->values().at(0)); + } + } + + SECTION("update with one track gone") { + tracks.tracks.erase("track2.wav"); + + db->Update(); + + std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get()); + REQUIRE(new_res->values().size() == 2); + CHECK(res->values().at(0) == new_res->values().at(0)); + CHECK(res->values().at(2) == new_res->values().at(1)); + } + + SECTION("update with tags changed") { + tracks.MakeTrack("track3.exe", "The Track 3"); + + db->Update(); + + std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get()); + REQUIRE(new_res->values().size() == 3); + CHECK(res->values().at(0) == new_res->values().at(0)); + CHECK(res->values().at(1) == new_res->values().at(1)); + CHECK(*new_res->values().at(2).tags().title == "The Track 3"); + // The id should not have changed, since this was just a tag update. + CHECK(res->values().at(2).data().id() == + new_res->values().at(2).data().id()); + } + + SECTION("update with one new track") { + tracks.MakeTrack("my track.midi", "Track 1 (nightcore remix)"); + + db->Update(); + + std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get()); + REQUIRE(new_res->values().size() == 4); + CHECK(res->values().at(0) == new_res->values().at(0)); + CHECK(res->values().at(1) == new_res->values().at(1)); + CHECK(res->values().at(2) == new_res->values().at(2)); + CHECK(*new_res->values().at(3).tags().title == + "Track 1 (nightcore remix)"); + CHECK(new_res->values().at(3).data().id() == 4); + } + + SECTION("get tracks with pagination") { + std::unique_ptr<Result<Track>> res(db->GetTracks(1).get()); + + REQUIRE(res->values().size() == 1); + CHECK(res->values().at(0).data().id() == 1); + REQUIRE(res->next_page()); + + res.reset(db->GetPage(&res->next_page().value()).get()); + + REQUIRE(res->values().size() == 1); + CHECK(res->values().at(0).data().id() == 2); + REQUIRE(res->next_page()); + + res.reset(db->GetPage(&res->next_page().value()).get()); + + REQUIRE(res->values().size() == 1); + CHECK(res->values().at(0).data().id() == 3); + REQUIRE(!res->next_page()); + + SECTION("page backwards") { + REQUIRE(res->prev_page()); + + res.reset(db->GetPage(&res->prev_page().value()).get()); + + REQUIRE(res->values().size() == 1); + CHECK(res->values().at(0).data().id() == 2); + REQUIRE(res->prev_page()); + + res.reset(db->GetPage(&res->prev_page().value()).get()); + + REQUIRE(res->values().size() == 1); + CHECK(res->values().at(0).data().id() == 1); + REQUIRE(!res->prev_page()); + + SECTION("page forwards again") { + REQUIRE(res->next_page()); + + res.reset(db->GetPage(&res->next_page().value()).get()); + + REQUIRE(res->values().size() == 1); + CHECK(res->values().at(0).data().id() == 2); + CHECK(res->next_page()); + CHECK(res->prev_page()); + } + } + } + } +} + +} // namespace database diff --git a/src/tangara/database/test/test_records.cpp b/src/tangara/database/test/test_records.cpp new file mode 100644 index 00000000..2f59489c --- /dev/null +++ b/src/tangara/database/test/test_records.cpp @@ -0,0 +1,146 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "records.hpp" + +#include <stdint.h> +#include <iomanip> +#include <string> + +#include "catch2/catch.hpp" + +std::pmr::string ToHex(const std::pmr::string& s) { + std::ostringstream ret; + + for (std::pmr::string::size_type i = 0; i < s.length(); ++i) + ret << std::hex << std::setfill('0') << std::setw(2) << std::uppercase + << (int)s[i]; + + return ret.str(); +} + +namespace database { + +TEST_CASE("database record encoding", "[unit]") { + SECTION("track id to bytes") { + TrackId id = 1234678; + OwningSlice as_bytes = TrackIdToBytes(id); + + SECTION("encodes correctly") { + // Purposefully a brittle test, since we need to be very careful about + // changing the way records are encoded. + REQUIRE(as_bytes.data.size() == 5); + // unsigned value + CHECK(as_bytes.data[0] == 0x1A); + // TODO(jacqueline): what's up with these failing? + // 12345678 + // CHECK(as_bytes.data[1] == 0x00); + // CHECK(as_bytes.data[2] == 0x01); + // CHECK(as_bytes.data[3] == 0xE2); + // CHECK(as_bytes.data[4] == 0x40); + } + + SECTION("round-trips") { + CHECK(*BytesToTrackId(as_bytes.data) == id); + } + + SECTION("encodes compactly") { + OwningSlice small_id = TrackIdToBytes(1); + OwningSlice large_id = TrackIdToBytes(999999); + + CHECK(small_id.data.size() < large_id.data.size()); + } + + SECTION("decoding rejects garbage") { + std::optional<TrackId> res = BytesToTrackId("i'm gay"); + + CHECK(res.has_value() == false); + } + } + + SECTION("data keys") { + OwningSlice key = CreateDataKey(123456); + + REQUIRE(key.data.size() == 7); + CHECK(key.data[0] == 'D'); + CHECK(key.data[1] == '\0'); + // unsigned int + CHECK(key.data[2] == 0x1A); + // assume the int encoding is fine. + } + + SECTION("data values") { + TrackData data(123, "/some/path.mp3", 0xACAB, 69, true); + + OwningSlice enc = CreateDataValue(data); + + SECTION("encodes correctly") { + REQUIRE(enc.data.size() == 24); + + // Array, length 5 + CHECK(enc.data[0] == 0x85); + + // unsigned int, value 123 + CHECK(enc.data[1] == 0x18); + CHECK(enc.data[2] == 0x7B); + + // text, 14 chars + CHECK(enc.data[3] == 0x6E); + // ... assume the text looks okay. + + // unsigned int, value 44203 + CHECK(enc.data[18] == 0x19); + CHECK(enc.data[19] == 0xAC); + CHECK(enc.data[20] == 0xAB); + + // unsigned int, value 69 + CHECK(enc.data[21] == 0x18); + CHECK(enc.data[22] == 0x45); + + // primitive 21, true + CHECK(enc.data[23] == 0xF5); + } + + SECTION("round-trips") { + CHECK(ParseDataValue(enc.slice) == data); + } + + SECTION("decoding rejects garbage") { + std::optional<TrackData> res = ParseDataValue("hi!"); + + CHECK(res.has_value() == false); + } + } + + SECTION("hash keys") { + OwningSlice key = CreateHashKey(123456); + + REQUIRE(key.data.size() == 7); + CHECK(key.data[0] == 'H'); + CHECK(key.data[1] == '\0'); + // unsigned int + CHECK(key.data[2] == 0x1A); + // assume the int encoding is fine. + } + + SECTION("hash values") { + OwningSlice val = CreateHashValue(123456); + + CHECK(val.data == TrackIdToBytes(123456).data); + + SECTION("round-trips") { + CHECK(ParseHashValue(val.slice) == 123456); + } + + SECTION("decoding rejects garbage") { + std::optional<TrackId> res = ParseHashValue("the first track :)"); + + CHECK(res.has_value() == false); + } + } +} + +} // namespace database diff --git a/src/tangara/database/track.cpp b/src/tangara/database/track.cpp new file mode 100644 index 00000000..1b1442a1 --- /dev/null +++ b/src/tangara/database/track.cpp @@ -0,0 +1,307 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "track.hpp" + +#include <iomanip> +#include <iostream> +#include <memory_resource> +#include <span> +#include <sstream> +#include <string> + +#include "esp_log.h" +#include "komihash.h" + +#include "memory_resource.hpp" + +namespace database { + +static constexpr char kGenreDelimiters[] = ",;"; + +auto tagName(Tag t) -> std::string { + switch (t) { + case Tag::kTitle: + return "title"; + case Tag::kArtist: + return "artist"; + case Tag::kAlbum: + return "album"; + case Tag::kAlbumArtist: + return "album_artist"; + case Tag::kDisc: + return "disc"; + case Tag::kTrack: + return "track"; + case Tag::kAlbumOrder: + return "album_order"; + case Tag::kGenres: + return "genre"; + } + return ""; +} + +auto tagHash(const TagValue& t) -> uint64_t { + return std::visit( + [&](auto&& arg) { + using T = std::decay_t<decltype(arg)>; + if constexpr (std::is_same_v<T, std::monostate>) { + return static_cast<uint64_t>(0); + } else if constexpr (std::is_same_v<T, std::pmr::string>) { + return komihash(arg.data(), arg.size(), 0); + } else if constexpr (std::is_same_v<T, uint32_t>) { + return komihash(&arg, sizeof(arg), 0); + } else if constexpr (std::is_same_v< + T, std::span<const std::pmr::string>>) { + komihash_stream_t hash; + komihash_stream_init(&hash, 0); + for (const auto& i : arg) { + komihash_stream_update(&hash, i.data(), i.size()); + } + return komihash_stream_final(&hash); + } + }, + t); + return 0; +} + +auto tagToString(const TagValue& val) -> std::string { + return std::visit( + [&](auto&& arg) -> std::string { + using T = std::decay_t<decltype(arg)>; + if constexpr (std::is_same_v<T, std::monostate>) { + return ""; + } else if constexpr (std::is_same_v<T, std::pmr::string>) { + return {arg.data(), arg.size()}; + } else if constexpr (std::is_same_v<T, uint32_t>) { + return std::to_string(arg); + } else if constexpr (std::is_same_v< + T, std::span<const std::pmr::string>>) { + std::ostringstream builder{}; + for (const auto& str : arg) { + builder << std::string{str.data(), str.size()} << ","; + } + return builder.str(); + } + }, + val); + return ""; +} + +auto TrackTags::create() -> std::shared_ptr<TrackTags> { + return std::allocate_shared<TrackTags, + std::pmr::polymorphic_allocator<TrackTags>>( + &memory::kSpiRamResource); +} + +template <typename T> +auto valueOrMonostate(std::optional<T> t) -> TagValue { + if (t) { + return *t; + } + return std::monostate{}; +} + +auto TrackTags::get(Tag t) const -> TagValue { + switch (t) { + case Tag::kTitle: + return valueOrMonostate(title_); + case Tag::kArtist: + return valueOrMonostate(artist_); + case Tag::kAlbum: + return valueOrMonostate(album_); + case Tag::kAlbumArtist: + return valueOrMonostate(album_artist_); + case Tag::kDisc: + return valueOrMonostate(disc_); + case Tag::kTrack: + return valueOrMonostate(track_); + case Tag::kAlbumOrder: + return albumOrder(); + case Tag::kGenres: + return genres_; + } + return std::monostate{}; +} + +auto TrackTags::set(Tag t, std::string_view v) -> void { + switch (t) { + case Tag::kTitle: + title(v); + break; + case Tag::kArtist: + artist(v); + break; + case Tag::kAlbum: + album(v); + break; + case Tag::kAlbumArtist: + albumArtist(v); + break; + case Tag::kDisc: + disc(v); + break; + case Tag::kTrack: + track(v); + break; + case Tag::kAlbumOrder: + // This tag is derices from disc and track, and so it can't be set. + break; + case Tag::kGenres: + genres(v); + break; + } +} + +auto TrackTags::allPresent() const -> std::vector<Tag> { + std::vector<Tag> out; + auto add_if_present = [&](Tag t, auto opt) { + if (opt) { + out.push_back(t); + } + }; + add_if_present(Tag::kTitle, title_); + add_if_present(Tag::kArtist, artist_); + add_if_present(Tag::kAlbum, album_); + add_if_present(Tag::kAlbumArtist, album_artist_); + add_if_present(Tag::kDisc, disc_); + add_if_present(Tag::kTrack, track_); + add_if_present(Tag::kGenres, !genres_.empty()); + return out; +} + +auto TrackTags::title() const -> const std::optional<std::pmr::string>& { + return title_; +} + +auto TrackTags::title(std::string_view s) -> void { + title_ = s; +} + +auto TrackTags::artist() const -> const std::optional<std::pmr::string>& { + return artist_; +} + +auto TrackTags::artist(std::string_view s) -> void { + artist_ = s; +} + +auto TrackTags::album() const -> const std::optional<std::pmr::string>& { + return album_; +} + +auto TrackTags::album(std::string_view s) -> void { + album_ = s; +} + +auto TrackTags::albumArtist() const -> const std::optional<std::pmr::string>& { + return album_artist_; +} + +auto TrackTags::albumArtist(std::string_view s) -> void { + album_artist_ = s; +} + +auto TrackTags::disc() const -> const std::optional<uint8_t>& { + return disc_; +} + +auto TrackTags::disc(const std::string_view s) -> void { + disc_ = std::strtol(s.data(), nullptr, 10); +} + +auto TrackTags::track() const -> const std::optional<uint16_t>& { + return track_; +} + +auto TrackTags::track(const std::string_view s) -> void { + track_ = std::strtol(s.data(), nullptr, 10); +} + +auto TrackTags::albumOrder() const -> uint32_t { + return (disc_.value_or(0) << 16) | track_.value_or(0); +} + +auto TrackTags::genres() const -> std::span<const std::pmr::string> { + return genres_; +} + +auto TrackTags::genres(const std::string_view s) -> void { + genres_.clear(); + std::string src = {s.data(), s.size()}; + char* token = std::strtok(src.data(), kGenreDelimiters); + + auto trim_and_add = [this](std::string_view s) { + std::string copy = {s.data(), s.size()}; + + // Trim the left + copy.erase(copy.begin(), + std::find_if(copy.begin(), copy.end(), [](unsigned char ch) { + return !std::isspace(ch); + })); + + // Trim the right + copy.erase(std::find_if(copy.rbegin(), copy.rend(), + [](unsigned char ch) { return !std::isspace(ch); }) + .base(), + copy.end()); + + // Ignore empty strings. + if (!copy.empty()) { + genres_.push_back({copy.data(), copy.size()}); + } + }; + + if (token == NULL) { + // No delimiters found in the input. Treat this as a single genre. + trim_and_add(s); + } else { + while (token != NULL) { + // Add tokens until no more delimiters found. + trim_and_add(token); + token = std::strtok(NULL, kGenreDelimiters); + } + } +} + +/* + * Uses a komihash stream to incrementally hash tags. This lowers the + * function's memory footprint a little so that it's safe to call from any + * stack. + */ +auto TrackTags::Hash() const -> uint64_t { + // TODO(jacqueline): this function doesn't work very well for tracks with no + // tags at all. + komihash_stream_t stream; + komihash_stream_init(&stream, 0); + + auto add = [&](const uint64_t& h) { + komihash_stream_update(&stream, &h, sizeof(h)); + }; + + add(tagHash(get(Tag::kTitle))); + add(tagHash(get(Tag::kArtist))); + add(tagHash(get(Tag::kAlbum))); + add(tagHash(get(Tag::kAlbumArtist))); + + // TODO: Should we be including this? + add(tagHash(get(Tag::kAlbumOrder))); + + return komihash_stream_final(&stream); +} + +auto Track::TitleOrFilename() const -> std::pmr::string { + auto title = tags().title(); + if (title) { + return *title; + } + auto start = data().filepath.find_last_of('/'); + if (start == std::pmr::string::npos) { + return data().filepath; + } + return data().filepath.substr(start + 1); +} +} // namespace database diff --git a/src/tangara/database/track.hpp b/src/tangara/database/track.hpp new file mode 100644 index 00000000..b097ab52 --- /dev/null +++ b/src/tangara/database/track.hpp @@ -0,0 +1,205 @@ +/* + * Copyright 2023 jacqueline <me@jacqueline.id.au> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include <cstdint> + +#include <map> +#include <memory> +#include <optional> +#include <span> +#include <string> +#include <unordered_map> +#include <utility> +#include <variant> + +#include "leveldb/db.h" +#include "memory_resource.hpp" + +namespace database { + +/* + * Uniquely describes a single track within the database. This value will be + * consistent across database updates, and should ideally (but is not guaranteed + * to) endure even across a track being removed and re-added. + * + * Four billion tracks should be enough for anybody. + */ +typedef uint32_t TrackId; + +/* + * Audio file encodings that we are aware of. Used to select an appropriate + * decoder at play time. + * + * Values of this enum are persisted in this database, so it is probably never a + * good idea to change the int representation of an existing value. + */ +enum class Container { + kUnsupported = 0, + kMp3 = 1, + kWav = 2, + kOgg = 3, + kFlac = 4, + kOpus = 5, +}; + +enum class Tag { + kTitle = 0, + kArtist = 1, + kAlbum = 2, + kAlbumArtist = 3, + kDisc = 4, + kTrack = 5, + kAlbumOrder = 6, + kGenres = 7, +}; + +using TagValue = std::variant<std::monostate, + std::pmr::string, + uint32_t, + std::span<const std::pmr::string>>; + +auto tagName(Tag) -> std::string; +auto tagHash(const TagValue&) -> uint64_t; +auto tagToString(const TagValue&) -> std::string; + +/* + * Owning container for tag-related track metadata that was extracted from a + * file. + */ +class TrackTags { + public: + static auto create() -> std::shared_ptr<TrackTags>; + + TrackTags() + : encoding_(Container::kUnsupported), genres_(&memory::kSpiRamResource) {} + + TrackTags(const TrackTags& other) = delete; + TrackTags& operator=(TrackTags& other) = delete; + + bool operator==(const TrackTags&) const = default; + + auto get(Tag) const -> TagValue; + auto set(Tag, std::string_view) -> void; + + auto allPresent() const -> std::vector<Tag>; + + auto encoding() const -> Container { return encoding_; }; + auto encoding(Container e) -> void { encoding_ = e; }; + + auto title() const -> const std::optional<std::pmr::string>&; + auto title(std::string_view) -> void; + + auto artist() const -> const std::optional<std::pmr::string>&; + auto artist(std::string_view) -> void; + + auto album() const -> const std::optional<std::pmr::string>&; + auto album(std::string_view) -> void; + + auto albumArtist() const -> const std::optional<std::pmr::string>&; + auto albumArtist(std::string_view) -> void; + + auto disc() const -> const std::optional<uint8_t>&; + auto disc(const std::string_view) -> void; + + auto track() const -> const std::optional<uint16_t>&; + auto track(const std::string_view) -> void; + + auto albumOrder() const -> uint32_t; + + auto genres() const -> std::span<const std::pmr::string>; + auto genres(const std::string_view) -> void; + + /* + * Returns a hash of the 'identifying' tags of this track. That is, a hash + * that can be used to determine if one track is likely the same as another, + * across things like re-encoding, re-mastering, or moving the underlying + * file. + */ + auto Hash() const -> uint64_t; + + private: + Container encoding_; + + std::optional<std::pmr::string> title_; + std::optional<std::pmr::string> artist_; + std::optional<std::pmr::string> album_; + std::optional<std::pmr::string> album_artist_; + std::optional<uint8_t> disc_; + std::optional<uint16_t> track_; + std::pmr::vector<std::pmr::string> genres_; +}; + +/* + * Owning container for all of the metadata we store for a particular track. + * This includes two main kinds of metadata: + * 1. static(ish) attributes, such as the id, path on disk, hash of the tags + * 2. dynamic attributes, such as the number of times this track has been + * played. + * + * Because a TrackData is immutable, it is thread safe but will not reflect any + * changes to the dynamic attributes that may happen after it was obtained. + * + * Tracks may be 'tombstoned'; this indicates that the track is no longer + * present at its previous location on disk, and we do not have any existing + * files with a matching tags_hash. When this is the case, we ignore this + * TrackData for most purposes. We keep the entry in our database so that we can + * properly restore dynamic attributes (such as play count) if the track later + * re-appears on disk. + */ +struct TrackData { + public: + TrackData() + : id(0), + filepath(), + tags_hash(0), + individual_tag_hashes(&memory::kSpiRamResource), + is_tombstoned(false), + modified_at() {} + + TrackId id; + std::pmr::string filepath; + uint64_t tags_hash; + std::pmr::unordered_map<Tag, uint64_t> individual_tag_hashes; + bool is_tombstoned; + std::pair<uint16_t, uint16_t> modified_at; + + TrackData(TrackData&& other) = delete; + TrackData& operator=(TrackData& other) = delete; + + bool operator==(const TrackData&) const = default; +}; + +/* + * Immutable and owning combination of a track's tags and metadata. + * + * Note that instances of this class may have a fairly large memory impact, due + * to the large number of strings they own. Prefer to query the database again + * (which has its own caching layer), rather than retaining Track instances for + * a long time. + */ +class Track { + public: + Track(std::shared_ptr<TrackData>& data, std::shared_ptr<TrackTags> tags) + : data_(data), tags_(tags) {} + + Track(Track& other) = delete; + Track& operator=(Track& other) = delete; + + bool operator==(const Track&) const = default; + + auto data() const -> const TrackData& { return *data_; } + auto tags() const -> const TrackTags& { return *tags_; } + + auto TitleOrFilename() const -> std::pmr::string; + + private: + std::shared_ptr<const TrackData> data_; + std::shared_ptr<TrackTags> tags_; +}; + +} // namespace database |
