From a9d2335e1d86b3012789a440e7f0e71033393056 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Tue, 9 Jul 2024 14:41:02 +1000 Subject: Break FatfsStreamFactory's dep on ServiceLocator --- src/tangara/database/database.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/tangara/database/database.cpp') diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp index cf1430b3..85700431 100644 --- a/src/tangara/database/database.cpp +++ b/src/tangara/database/database.cpp @@ -684,6 +684,12 @@ auto Database::countRecords(const SearchKey& c) -> size_t { return count; } +Handle::Handle(std::shared_ptr& db) : db_(db) {} + +auto Handle::lock() -> std::shared_ptr { + return db_; +} + auto SearchKey::startKey() const -> std::string_view { if (key) { return *key; -- cgit v1.2.3 From f8a3c16aad4e55bd19374c5029b4ac606b07dd7d Mon Sep 17 00:00:00 2001 From: jacqueline Date: Thu, 8 Aug 2024 10:29:46 +1000 Subject: Use one MMU page per leveldb write buffer Also drop some of the other tuning changes, since they don't seem to impact much. --- src/tangara/database/database.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/tangara/database/database.cpp') diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp index 85700431..e3f3df67 100644 --- a/src/tangara/database/database.cpp +++ b/src/tangara/database/database.cpp @@ -144,10 +144,10 @@ auto Database::Open(IFileGatherer& gatherer, leveldb::Options options; options.env = sEnv.env(); - options.write_buffer_size = 4 * 1024; - options.max_file_size = 16 * 1024; + // Match the write buffer size to the MMU page size in order to + // make most efficient use of PSRAM mapping. + options.write_buffer_size = CONFIG_MMU_PAGE_SIZE; options.block_cache = cache.get(); - options.block_size = 2048; auto status = leveldb::DB::Open(options, kDbPath, &db); if (!status.ok()) { -- cgit v1.2.3 From b5dc53670a259c3fdf2d3f20f52880f2218221d7 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Thu, 8 Aug 2024 12:30:49 +1000 Subject: Derive the next track id from stored track data, instead of tracking it explicitly This saves about 1ms per new track right now, but more importantly means that minting a new track id is now a single atomic operation, rather than being its own database write. This is a useful property that will come in handy in a few commits time. --- src/tangara/database/database.cpp | 56 ++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 16 deletions(-) (limited to 'src/tangara/database/database.cpp') diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp index e3f3df67..c543b941 100644 --- a/src/tangara/database/database.cpp +++ b/src/tangara/database/database.cpp @@ -24,6 +24,7 @@ #include "cppbor.h" #include "cppbor_parse.h" #include "database/index.hpp" +#include "debug.hpp" #include "esp_log.h" #include "esp_timer.h" #include "ff.h" @@ -60,7 +61,6 @@ static const char kKeyDbVersion[] = "schema_version"; static const char kKeyCustom[] = "U\0"; static const char kKeyCollator[] = "collator"; -static const char kKeyTrackId[] = "next_track_id"; static std::atomic sIsDbOpen(false); @@ -190,7 +190,10 @@ Database::Database(leveldb::DB* db, file_gatherer_(file_gatherer), tag_parser_(tag_parser), collator_(collator), - is_updating_(false) {} + is_updating_(false) { + dbCalculateNextTrackId(); + ESP_LOGI(kTag, "next track id is %lu", next_track_id_.load()); +} Database::~Database() { // Delete db_ first so that any outstanding background work finishes before @@ -492,24 +495,45 @@ auto Database::isUpdating() -> bool { return is_updating_; } -auto Database::dbMintNewTrackId() -> TrackId { - TrackId next_id = 1; - std::string val; - auto status = db_->Get(leveldb::ReadOptions(), kKeyTrackId, &val); - if (status.ok()) { - next_id = BytesToTrackId(val).value_or(next_id); - } else if (!status.IsNotFound()) { - // TODO(jacqueline): Handle this more. - ESP_LOGE(kTag, "failed to get next track id"); +auto Database::dbCalculateNextTrackId() -> void { + std::unique_ptr it{ + db_->NewIterator(leveldb::ReadOptions())}; + + // Track data entries are of the format 'D/trackid', where track ids are + // encoded as big-endian cbor types. They can therefore be compared through + // byte ordering, which means we can determine what the next id should be by + // looking at the larged track data record in the database. + std::string prefix = EncodeDataPrefix(); + std::string prefixPlusOne = prefix; + prefixPlusOne[prefixPlusOne.size() - 1]++; + + // Seek to just past the track data section. + it->Seek(prefixPlusOne); + if (!it->Valid()) { + next_track_id_ = 1; + return; } - if (!db_->Put(leveldb::WriteOptions(), kKeyTrackId, - TrackIdToBytes(next_id + 1)) - .ok()) { - ESP_LOGE(kTag, "failed to write next track id"); + // Go back to the last track data record. + it->Prev(); + if (!it->Valid() || !it->key().starts_with(prefix)) { + next_track_id_ = 1; + return; } - return next_id; + // Parse the track id back out of the key. + std::span key{it->key().data(), it->key().size()}; + auto id_part = key.subspan(prefix.size()); + if (id_part.empty()) { + next_track_id_ = 1; + return; + } + + next_track_id_ = BytesToTrackId(id_part).value_or(0) + 1; +} + +auto Database::dbMintNewTrackId() -> TrackId { + return next_track_id_++; } auto Database::dbEntomb(TrackId id, uint64_t hash) -> void { -- cgit v1.2.3 From 30aaefca64445efa421edb93403036d59382920f Mon Sep 17 00:00:00 2001 From: jacqueline Date: Thu, 8 Aug 2024 14:35:53 +1000 Subject: Batch up the db operations associated with adding new tracks This is ostensibly yet another 'prepare for multithreaded updates' commit, however it does actually save us another 60(!!) odd milliseconds per track. --- src/tangara/database/database.cpp | 183 ++++++++++++++++---------------------- 1 file changed, 75 insertions(+), 108 deletions(-) (limited to 'src/tangara/database/database.cpp') diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp index c543b941..aec661d9 100644 --- a/src/tangara/database/database.cpp +++ b/src/tangara/database/database.cpp @@ -352,11 +352,19 @@ auto Database::updateIndexes() -> void { // We couldn't read the tags for this track. Either they were // malformed, or perhaps the file is missing. Either way, tombstone // this record. - ESP_LOGW(kTag, "entombing missing #%lx", track->id); + ESP_LOGI(kTag, "entombing missing #%lx", track->id); + + // Remove the indexes first, so that interrupted operations don't leave + // dangling index records. dbRemoveIndexes(track); + + // Do the rest of the tombstoning as one atomic write. + leveldb::WriteBatch batch; track->is_tombstoned = true; - dbPutTrackData(*track); - db_->Delete(leveldb::WriteOptions{}, EncodePathKey(track->filepath)); + batch.Put(EncodeDataKey(track->id), EncodeDataValue(*track)); + batch.Delete(EncodePathKey(track->filepath)); + + db_->Write(leveldb::WriteOptions(), &batch); continue; } @@ -370,12 +378,20 @@ auto Database::updateIndexes() -> void { // database. ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash, new_hash); + + // Again, we remove the old index records first so has to avoid + // dangling references. dbRemoveIndexes(track); + // Atomically correct the hash + create the new index records. + leveldb::WriteBatch batch; track->tags_hash = new_hash; - dbIngestTagHashes(*tags, track->individual_tag_hashes); - dbPutTrackData(*track); - dbPutHash(new_hash, track->id); + dbIngestTagHashes(*tags, track->individual_tag_hashes, batch); + + dbCreateIndexesForTrack(*track, *tags, batch); + batch.Put(EncodeDataKey(track->id), EncodeDataValue(*track)); + batch.Put(EncodeHashKey(new_hash), EncodeHashValue(track->id)); + db_->Write(leveldb::WriteOptions(), &batch); } } } @@ -404,72 +420,56 @@ auto Database::updateIndexes() -> void { return; } - // Check for any existing record with the same hash. + // Check for any existing track with the same hash. uint64_t hash = tags->Hash(); - std::string key = EncodeHashKey(hash); - std::optional existing_hash; + std::optional existing_id; std::string raw_entry; - if (db_->Get(leveldb::ReadOptions(), key, &raw_entry).ok()) { - existing_hash = ParseHashValue(raw_entry); + if (db_->Get(leveldb::ReadOptions(), EncodeHashKey(hash), &raw_entry) + .ok()) { + existing_id = ParseHashValue(raw_entry); } - std::pair modified{info.fdate, info.ftime}; - if (!existing_hash) { - // We've never met this track before! Or we have, but the entry is - // malformed. Either way, record this as a new track. - TrackId id = dbMintNewTrackId(); - ESP_LOGD(kTag, "recording new 0x%lx", id); + std::shared_ptr data; + if (existing_id) { + // Do we have any existing data for this track? This could be the case if + // this is a tombstoned entry. In such as case, we want to reuse the + // previous TrackData so that any extra metadata is preserved. + data = dbGetTrackData(*existing_id); + if (!data) { + data = std::make_shared(); + data->id = *existing_id; + } else if (data->filepath != path) { + ESP_LOGW(kTag, "hash collision: %s, %s, %s", + tags->title().value_or("no title").c_str(), + tags->artist().value_or("no artist").c_str(), + tags->album().value_or("no album").c_str()); + // Don't commit anything if there's a hash collision, since we're + // likely to make a big mess. + return; + } + } else { num_new_tracks++; - - auto data = std::make_shared(); - data->id = id; - data->filepath = path; - data->tags_hash = hash; - data->modified_at = modified; - dbIngestTagHashes(*tags, data->individual_tag_hashes); - - dbPutTrackData(*data); - dbPutHash(hash, id); - auto t = std::make_shared(data, tags); - dbCreateIndexesForTrack(*t); - db_->Put(leveldb::WriteOptions{}, EncodePathKey(path), - TrackIdToBytes(id)); - return; + data = std::make_shared(); + data->id = dbMintNewTrackId(); } - std::shared_ptr existing_data = dbGetTrackData(*existing_hash); - if (!existing_data) { - // We found a hash that matches, but there's no data record? Weird. - auto new_data = std::make_shared(); - new_data->id = dbMintNewTrackId(); - new_data->filepath = path; - new_data->tags_hash = hash; - new_data->modified_at = modified; - dbIngestTagHashes(*tags, new_data->individual_tag_hashes); - dbPutTrackData(*new_data); - auto t = std::make_shared(new_data, tags); - dbCreateIndexesForTrack(*t); - db_->Put(leveldb::WriteOptions{}, EncodePathKey(path), - TrackIdToBytes(new_data->id)); - return; - } + // Make sure the file-based metadata on the TrackData is up to date. + data->filepath = path; + data->tags_hash = hash; + data->modified_at = {info.fdate, info.ftime}; - if (existing_data->is_tombstoned) { - ESP_LOGI(kTag, "exhuming track %lu", existing_data->id); - existing_data->is_tombstoned = false; - existing_data->modified_at = modified; - dbPutTrackData(*existing_data); - auto t = std::make_shared(existing_data, tags); - dbCreateIndexesForTrack(*t); - db_->Put(leveldb::WriteOptions{}, EncodePathKey(path), - TrackIdToBytes(existing_data->id)); - } else if (existing_data->filepath != - std::pmr::string{path.data(), path.size()}) { - ESP_LOGW(kTag, "hash collision: %s, %s, %s", - tags->title().value_or("no title").c_str(), - tags->artist().value_or("no artist").c_str(), - tags->album().value_or("no album").c_str()); - } + // Apply all the actual database changes as one atomic batch. This makes + // the whole 'new track' operation atomic, and also reduces the amount of + // lock contention when adding many tracks at once. + leveldb::WriteBatch batch; + dbIngestTagHashes(*tags, data->individual_tag_hashes, batch); + + dbCreateIndexesForTrack(*data, *tags, batch); + batch.Put(EncodeDataKey(data->id), EncodeDataValue(*data)); + batch.Put(EncodeHashKey(data->tags_hash), EncodeHashValue(data->id)); + batch.Put(EncodePathKey(path), TrackIdToBytes(data->id)); + + db_->Write(leveldb::WriteOptions(), &batch); }); uint64_t end_time = esp_timer_get_time(); @@ -536,22 +536,6 @@ auto Database::dbMintNewTrackId() -> TrackId { return next_track_id_++; } -auto Database::dbEntomb(TrackId id, uint64_t hash) -> void { - std::string key = EncodeHashKey(hash); - std::string val = EncodeHashValue(id); - if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) { - ESP_LOGE(kTag, "failed to entomb #%llx (id #%lx)", hash, id); - } -} - -auto Database::dbPutTrackData(const TrackData& s) -> void { - std::string key = EncodeDataKey(s.id); - std::string val = EncodeDataValue(s); - if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) { - ESP_LOGE(kTag, "failed to write data for #%lx", s.id); - } -} - auto Database::dbGetTrackData(TrackId id) -> std::shared_ptr { std::string key = EncodeDataKey(id); std::string raw_val; @@ -562,33 +546,19 @@ auto Database::dbGetTrackData(TrackId id) -> std::shared_ptr { return ParseDataValue(raw_val); } -auto Database::dbPutHash(const uint64_t& hash, TrackId i) -> void { - std::string key = EncodeHashKey(hash); - std::string val = EncodeHashValue(i); - if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) { - ESP_LOGE(kTag, "failed to write hash for #%lx", i); - } -} - -auto Database::dbGetHash(const uint64_t& hash) -> std::optional { - std::string key = EncodeHashKey(hash); - std::string raw_val; - if (!db_->Get(leveldb::ReadOptions(), key, &raw_val).ok()) { - ESP_LOGW(kTag, "no key found for hash #%llx", hash); - return {}; - } - return ParseHashValue(raw_val); +auto Database::dbCreateIndexesForTrack(const Track& track, + leveldb::WriteBatch& batch) -> void { + dbCreateIndexesForTrack(track.data(), track.tags(), batch); } -auto Database::dbCreateIndexesForTrack(const Track& track) -> void { +auto Database::dbCreateIndexesForTrack(const TrackData& data, + const TrackTags& tags, + leveldb::WriteBatch& batch) -> void { for (const IndexInfo& index : getIndexes()) { - leveldb::WriteBatch writes; - auto entries = Index(collator_, index, track); + auto entries = Index(collator_, index, data, tags); for (const auto& it : entries) { - writes.Put(EncodeIndexKey(it.first), - {it.second.data(), it.second.size()}); + batch.Put(EncodeIndexKey(it.first), {it.second.data(), it.second.size()}); } - db_->Write(leveldb::WriteOptions(), &writes); } } @@ -597,9 +567,8 @@ auto Database::dbRemoveIndexes(std::shared_ptr data) -> void { if (!tags) { return; } - Track track{data, tags}; for (const IndexInfo& index : getIndexes()) { - auto entries = Index(collator_, index, track); + auto entries = Index(collator_, index, *data, *tags); for (auto it = entries.rbegin(); it != entries.rend(); it++) { auto key = EncodeIndexKey(it->first); auto status = db_->Delete(leveldb::WriteOptions{}, key); @@ -626,16 +595,14 @@ auto Database::dbRemoveIndexes(std::shared_ptr data) -> void { } auto Database::dbIngestTagHashes(const TrackTags& tags, - std::pmr::unordered_map& out) - -> void { - leveldb::WriteBatch batch{}; + std::pmr::unordered_map& out, + leveldb::WriteBatch& batch) -> void { for (const auto& tag : tags.allPresent()) { auto val = tags.get(tag); auto hash = tagHash(val); batch.Put(EncodeTagHashKey(hash), tagToString(val)); out[tag] = hash; } - db_->Write(leveldb::WriteOptions{}, &batch); } auto Database::dbRecoverTagsFromHashes( -- cgit v1.2.3 From 28cf749951a8f811606bb233efecfd36738c3c89 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Thu, 8 Aug 2024 16:08:46 +1000 Subject: Make FileGatherer shaped more like a normal iterator --- src/tangara/database/database.cpp | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'src/tangara/database/database.cpp') diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp index aec661d9..2d72fe95 100644 --- a/src/tangara/database/database.cpp +++ b/src/tangara/database/database.cpp @@ -24,6 +24,7 @@ #include "cppbor.h" #include "cppbor_parse.h" #include "database/index.hpp" +#include "database/track_finder.hpp" #include "debug.hpp" #include "esp_log.h" #include "esp_timer.h" @@ -40,7 +41,6 @@ #include "database/db_events.hpp" #include "database/env_esp.hpp" -#include "database/file_gatherer.hpp" #include "database/records.hpp" #include "database/tag_parser.hpp" #include "database/track.hpp" @@ -122,8 +122,7 @@ static auto CheckDatabase(leveldb::DB& db, locale::ICollator& col) -> bool { return true; } -auto Database::Open(IFileGatherer& gatherer, - ITagParser& parser, +auto Database::Open(ITagParser& parser, locale::ICollator& collator, tasks::WorkerPool& bg_worker) -> cpp::result { @@ -168,8 +167,7 @@ auto Database::Open(IFileGatherer& gatherer, } ESP_LOGI(kTag, "Database opened successfully"); - return new Database(db, cache.release(), gatherer, parser, - collator); + return new Database(db, cache.release(), parser, collator); }) .get(); } @@ -182,12 +180,10 @@ auto Database::Destroy() -> void { Database::Database(leveldb::DB* db, leveldb::Cache* cache, - IFileGatherer& file_gatherer, ITagParser& tag_parser, locale::ICollator& collator) : db_(db), cache_(cache), - file_gatherer_(file_gatherer), tag_parser_(tag_parser), collator_(collator), is_updating_(false) { @@ -401,7 +397,11 @@ auto Database::updateIndexes() -> void { // Stage 2: search for newly added files. ESP_LOGI(kTag, "scanning for new tracks"); uint64_t num_files = 0; - file_gatherer_.FindFiles("", [&](std::string_view path, const FILINFO& info) { + + auto track_finder = std::make_shared(""); + + FILINFO info; + while (auto path = track_finder->next(info)) { num_files++; events::Ui().Dispatch(event::UpdateProgress{ .stage = event::UpdateProgress::Stage::kScanningForNewTracks, @@ -409,15 +409,15 @@ auto Database::updateIndexes() -> void { }); std::string unused; - if (db_->Get(read_options, EncodePathKey(path), &unused).ok()) { + if (db_->Get(read_options, EncodePathKey(*path), &unused).ok()) { // This file is already in the database; skip it. - return; + continue; } - std::shared_ptr tags = tag_parser_.ReadAndParseTags(path); + std::shared_ptr tags = tag_parser_.ReadAndParseTags(*path); if (!tags || tags->encoding() == Container::kUnsupported) { // No parseable tags; skip this fiile. - return; + continue; } // Check for any existing track with the same hash. @@ -438,14 +438,14 @@ auto Database::updateIndexes() -> void { if (!data) { data = std::make_shared(); data->id = *existing_id; - } else if (data->filepath != path) { + } else if (std::string_view{data->filepath} != *path) { ESP_LOGW(kTag, "hash collision: %s, %s, %s", tags->title().value_or("no title").c_str(), tags->artist().value_or("no artist").c_str(), tags->album().value_or("no album").c_str()); // Don't commit anything if there's a hash collision, since we're // likely to make a big mess. - return; + continue; } } else { num_new_tracks++; @@ -454,7 +454,7 @@ auto Database::updateIndexes() -> void { } // Make sure the file-based metadata on the TrackData is up to date. - data->filepath = path; + data->filepath = *path; data->tags_hash = hash; data->modified_at = {info.fdate, info.ftime}; @@ -467,10 +467,10 @@ auto Database::updateIndexes() -> void { dbCreateIndexesForTrack(*data, *tags, batch); batch.Put(EncodeDataKey(data->id), EncodeDataValue(*data)); batch.Put(EncodeHashKey(data->tags_hash), EncodeHashValue(data->id)); - batch.Put(EncodePathKey(path), TrackIdToBytes(data->id)); + batch.Put(EncodePathKey(*path), TrackIdToBytes(data->id)); db_->Write(leveldb::WriteOptions(), &batch); - }); + }; uint64_t end_time = esp_timer_get_time(); -- cgit v1.2.3 From 2ad83cb2108dc55c9eb0573b0645513a1e8a61f5 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Fri, 9 Aug 2024 11:43:48 +1000 Subject: Shard searching for new tracks across multiple tasks This also has the effect of breaking up the enormous 'updateIndexes' method into one call per file, which means database updates also no longer monopolise a single background task for their entire duration. avg. time per new file is now <140ms for a completely fresh database, which is pretty good i think! --- src/tangara/database/database.cpp | 253 ++++++++++++++++++++------------------ 1 file changed, 132 insertions(+), 121 deletions(-) (limited to 'src/tangara/database/database.cpp') diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp index 2d72fe95..491ad8b7 100644 --- a/src/tangara/database/database.cpp +++ b/src/tangara/database/database.cpp @@ -6,9 +6,6 @@ #include "database/database.hpp" -#include -#include - #include #include #include @@ -20,12 +17,8 @@ #include #include -#include "collation.hpp" #include "cppbor.h" #include "cppbor_parse.h" -#include "database/index.hpp" -#include "database/track_finder.hpp" -#include "debug.hpp" #include "esp_log.h" #include "esp_timer.h" #include "ff.h" @@ -39,12 +32,14 @@ #include "leveldb/status.h" #include "leveldb/write_batch.h" +#include "collation.hpp" #include "database/db_events.hpp" #include "database/env_esp.hpp" +#include "database/index.hpp" #include "database/records.hpp" #include "database/tag_parser.hpp" #include "database/track.hpp" -#include "drivers/spi.hpp" +#include "database/track_finder.hpp" #include "events/event_queue.hpp" #include "memory_resource.hpp" #include "result.hpp" @@ -58,12 +53,16 @@ static SingletonEnv sEnv; static const char kDbPath[] = "/.tangara-db"; static const char kKeyDbVersion[] = "schema_version"; - static const char kKeyCustom[] = "U\0"; static const char kKeyCollator[] = "collator"; +static constexpr size_t kMaxParallelism = 2; + static std::atomic sIsDbOpen(false); +using std::placeholders::_1; +using std::placeholders::_2; + static auto CreateNewDatabase(leveldb::Options& options, locale::ICollator& col) -> leveldb::DB* { Database::Destroy(); @@ -167,7 +166,8 @@ auto Database::Open(ITagParser& parser, } ESP_LOGI(kTag, "Database opened successfully"); - return new Database(db, cache.release(), parser, collator); + return new Database(db, cache.release(), bg_worker, parser, + collator); }) .get(); } @@ -180,15 +180,20 @@ auto Database::Destroy() -> void { Database::Database(leveldb::DB* db, leveldb::Cache* cache, + tasks::WorkerPool& pool, ITagParser& tag_parser, locale::ICollator& collator) : db_(db), cache_(cache), + track_finder_( + pool, + kMaxParallelism, + std::bind(&Database::processCandidateCallback, this, _1, _2), + std::bind(&Database::indexingCompleteCallback, this)), tag_parser_(tag_parser), collator_(collator), is_updating_(false) { dbCalculateNextTrackId(); - ESP_LOGI(kTag, "next track id is %lu", next_track_id_.load()); } Database::~Database() { @@ -243,7 +248,7 @@ auto Database::get(const std::string& key) -> std::optional { } auto Database::getTrackPath(TrackId id) -> std::optional { - auto track_data = dbGetTrackData(id); + auto track_data = dbGetTrackData(leveldb::ReadOptions(), id); if (!track_data) { return {}; } @@ -251,7 +256,7 @@ auto Database::getTrackPath(TrackId id) -> std::optional { } auto Database::getTrack(TrackId id) -> std::shared_ptr { - std::shared_ptr data = dbGetTrackData(id); + std::shared_ptr data = dbGetTrackData(leveldb::ReadOptions(), id); if (!data || data->is_tombstoned) { return {}; } @@ -274,34 +279,61 @@ auto Database::getIndexes() -> std::vector { }; } -class UpdateNotifier { - public: - UpdateNotifier(std::atomic& is_updating) : is_updating_(is_updating) { - events::Ui().Dispatch(event::UpdateStarted{}); - events::System().Dispatch(event::UpdateStarted{}); +Database::UpdateTracker::UpdateTracker() + : num_old_tracks_(0), + num_new_tracks_(0), + start_time_(esp_timer_get_time()) { + events::Ui().Dispatch(event::UpdateStarted{}); + events::System().Dispatch(event::UpdateStarted{}); +} + +Database::UpdateTracker::~UpdateTracker() { + uint64_t end_time = esp_timer_get_time(); + + uint64_t time_per_old = 0; + if (num_old_tracks_) { + time_per_old = (verification_finish_time_ - start_time_) / num_old_tracks_; } - ~UpdateNotifier() { - is_updating_ = false; - events::Ui().Dispatch(event::UpdateFinished{}); - events::System().Dispatch(event::UpdateFinished{}); + uint64_t time_per_new = 0; + if (num_new_tracks_) { + time_per_new = (end_time - verification_finish_time_) / num_new_tracks_; } - private: - std::atomic& is_updating_; -}; + ESP_LOGI( + kTag, + "processed %lu old tracks and %lu new tracks in %llu seconds (%llums " + "per old, %llums per new)", + num_old_tracks_, num_new_tracks_, (end_time - start_time_) / 1000000, + time_per_old / 1000, time_per_new / 1000); + + events::Ui().Dispatch(event::UpdateFinished{}); + events::System().Dispatch(event::UpdateFinished{}); +} + +auto Database::UpdateTracker::onTrackVerified() -> void { + events::Ui().Dispatch(event::UpdateProgress{ + .stage = event::UpdateProgress::Stage::kVerifyingExistingTracks, + .val = ++num_old_tracks_, + }); +} + +auto Database::UpdateTracker::onVerificationFinished() -> void { + verification_finish_time_ = esp_timer_get_time(); +} + +auto Database::UpdateTracker::onTrackAdded() -> void { + num_new_tracks_++; +} auto Database::updateIndexes() -> void { if (is_updating_.exchange(true)) { return; } - UpdateNotifier notifier{is_updating_}; - - uint32_t num_old_tracks = 0; - uint32_t num_new_tracks = 0; - uint64_t start_time = esp_timer_get_time(); + update_tracker_ = std::make_unique(); leveldb::ReadOptions read_options; - read_options.fill_cache = true; + read_options.fill_cache = false; + read_options.verify_checksums = true; // Stage 1: verify all existing tracks are still valid. ESP_LOGI(kTag, "verifying existing tracks"); @@ -310,11 +342,7 @@ auto Database::updateIndexes() -> void { std::string prefix = EncodeDataPrefix(); for (it->Seek(prefix); it->Valid() && it->key().starts_with(prefix); it->Next()) { - num_old_tracks++; - events::Ui().Dispatch(event::UpdateProgress{ - .stage = event::UpdateProgress::Stage::kVerifyingExistingTracks, - .val = num_old_tracks, - }); + update_tracker_->onTrackVerified(); std::shared_ptr track = ParseDataValue(it->value()); if (!track) { @@ -325,7 +353,6 @@ auto Database::updateIndexes() -> void { } if (track->is_tombstoned) { - ESP_LOGW(kTag, "skipping tombstoned %lx", track->id); continue; } @@ -392,103 +419,86 @@ auto Database::updateIndexes() -> void { } } - uint64_t verify_end_time = esp_timer_get_time(); + update_tracker_->onVerificationFinished(); // Stage 2: search for newly added files. ESP_LOGI(kTag, "scanning for new tracks"); - uint64_t num_files = 0; - - auto track_finder = std::make_shared(""); + track_finder_.launch(""); +}; - FILINFO info; - while (auto path = track_finder->next(info)) { - num_files++; - events::Ui().Dispatch(event::UpdateProgress{ - .stage = event::UpdateProgress::Stage::kScanningForNewTracks, - .val = num_files, - }); +auto Database::processCandidateCallback(FILINFO& info, std::string_view path) + -> void { + leveldb::ReadOptions read_options; + read_options.fill_cache = true; + read_options.verify_checksums = false; - std::string unused; - if (db_->Get(read_options, EncodePathKey(*path), &unused).ok()) { - // This file is already in the database; skip it. - continue; - } + std::string unused; + if (db_->Get(read_options, EncodePathKey(path), &unused).ok()) { + // This file is already in the database; skip it. + return; + } - std::shared_ptr tags = tag_parser_.ReadAndParseTags(*path); - if (!tags || tags->encoding() == Container::kUnsupported) { - // No parseable tags; skip this fiile. - continue; - } + std::shared_ptr tags = tag_parser_.ReadAndParseTags(path); + if (!tags || tags->encoding() == Container::kUnsupported) { + // No parseable tags; skip this fiile. + return; + } - // Check for any existing track with the same hash. - uint64_t hash = tags->Hash(); - std::optional existing_id; - std::string raw_entry; - if (db_->Get(leveldb::ReadOptions(), EncodeHashKey(hash), &raw_entry) - .ok()) { - existing_id = ParseHashValue(raw_entry); - } + // Check for any existing track with the same hash. + uint64_t hash = tags->Hash(); + std::optional existing_id; + std::string raw_entry; + if (db_->Get(read_options, EncodeHashKey(hash), &raw_entry).ok()) { + existing_id = ParseHashValue(raw_entry); + } - std::shared_ptr data; - if (existing_id) { - // Do we have any existing data for this track? This could be the case if - // this is a tombstoned entry. In such as case, we want to reuse the - // previous TrackData so that any extra metadata is preserved. - data = dbGetTrackData(*existing_id); - if (!data) { - data = std::make_shared(); - data->id = *existing_id; - } else if (std::string_view{data->filepath} != *path) { - ESP_LOGW(kTag, "hash collision: %s, %s, %s", - tags->title().value_or("no title").c_str(), - tags->artist().value_or("no artist").c_str(), - tags->album().value_or("no album").c_str()); - // Don't commit anything if there's a hash collision, since we're - // likely to make a big mess. - continue; - } - } else { - num_new_tracks++; + std::shared_ptr data; + if (existing_id) { + // Do we have any existing data for this track? This could be the case if + // this is a tombstoned entry. In such as case, we want to reuse the + // previous TrackData so that any extra metadata is preserved. + data = dbGetTrackData(read_options, *existing_id); + if (!data) { data = std::make_shared(); - data->id = dbMintNewTrackId(); + data->id = *existing_id; + } else if (data->filepath != path && !data->is_tombstoned) { + ESP_LOGW(kTag, "hash collision: %s, %s, %s", + tags->title().value_or("no title").c_str(), + tags->artist().value_or("no artist").c_str(), + tags->album().value_or("no album").c_str()); + // Don't commit anything if there's a hash collision, since we're + // likely to make a big mess. + return; } + } else { + update_tracker_->onTrackAdded(); + data = std::make_shared(); + data->id = dbMintNewTrackId(); + } - // Make sure the file-based metadata on the TrackData is up to date. - data->filepath = *path; - data->tags_hash = hash; - data->modified_at = {info.fdate, info.ftime}; - - // Apply all the actual database changes as one atomic batch. This makes - // the whole 'new track' operation atomic, and also reduces the amount of - // lock contention when adding many tracks at once. - leveldb::WriteBatch batch; - dbIngestTagHashes(*tags, data->individual_tag_hashes, batch); - - dbCreateIndexesForTrack(*data, *tags, batch); - batch.Put(EncodeDataKey(data->id), EncodeDataValue(*data)); - batch.Put(EncodeHashKey(data->tags_hash), EncodeHashValue(data->id)); - batch.Put(EncodePathKey(*path), TrackIdToBytes(data->id)); + // Make sure the file-based metadata on the TrackData is up to date. + data->filepath = path; + data->tags_hash = hash; + data->modified_at = {info.fdate, info.ftime}; + data->is_tombstoned = false; - db_->Write(leveldb::WriteOptions(), &batch); - }; + // Apply all the actual database changes as one atomic batch. This makes + // the whole 'new track' operation atomic, and also reduces the amount of + // lock contention when adding many tracks at once. + leveldb::WriteBatch batch; + dbIngestTagHashes(*tags, data->individual_tag_hashes, batch); - uint64_t end_time = esp_timer_get_time(); + dbCreateIndexesForTrack(*data, *tags, batch); + batch.Put(EncodeDataKey(data->id), EncodeDataValue(*data)); + batch.Put(EncodeHashKey(data->tags_hash), EncodeHashValue(data->id)); + batch.Put(EncodePathKey(path), TrackIdToBytes(data->id)); - uint64_t time_per_old = 0; - if (num_old_tracks) { - time_per_old = (verify_end_time - start_time) / num_old_tracks; - } - uint64_t time_per_new = 0; - if (num_new_tracks) { - time_per_new = (end_time - verify_end_time) / num_new_tracks; - } + db_->Write(leveldb::WriteOptions(), &batch); +} - ESP_LOGI( - kTag, - "processed %lu old tracks and %lu new tracks in %llu seconds (%llums " - "per old, %llums per new)", - num_old_tracks, num_new_tracks, (end_time - start_time) / 1000000, - time_per_old / 1000, time_per_new / 1000); +auto Database::indexingCompleteCallback() -> void { + update_tracker_.reset(); + is_updating_ = false; } auto Database::isUpdating() -> bool { @@ -536,10 +546,11 @@ auto Database::dbMintNewTrackId() -> TrackId { return next_track_id_++; } -auto Database::dbGetTrackData(TrackId id) -> std::shared_ptr { +auto Database::dbGetTrackData(leveldb::ReadOptions options, TrackId id) + -> std::shared_ptr { std::string key = EncodeDataKey(id); std::string raw_val; - if (!db_->Get(leveldb::ReadOptions(), key, &raw_val).ok()) { + if (!db_->Get(options, key, &raw_val).ok()) { ESP_LOGW(kTag, "no key found for #%lx", id); return {}; } -- cgit v1.2.3 From dacf3efc45677343479b4d3ff9502504b211639a Mon Sep 17 00:00:00 2001 From: jacqueline Date: Fri, 6 Sep 2024 14:53:01 +1000 Subject: Look for music in "/Music", with the root dir as a fallback --- src/tangara/database/database.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/tangara/database/database.cpp') diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp index 491ad8b7..64451f48 100644 --- a/src/tangara/database/database.cpp +++ b/src/tangara/database/database.cpp @@ -51,6 +51,7 @@ static SingletonEnv sEnv; [[maybe_unused]] static const char* kTag = "DB"; static const char kDbPath[] = "/.tangara-db"; +static const char kMusicPath[] = "Music"; static const char kKeyDbVersion[] = "schema_version"; static const char kKeyCustom[] = "U\0"; @@ -422,8 +423,14 @@ auto Database::updateIndexes() -> void { update_tracker_->onVerificationFinished(); // Stage 2: search for newly added files. - ESP_LOGI(kTag, "scanning for new tracks"); - track_finder_.launch(""); + std::string root; + FF_DIR dir; + if (f_opendir(&dir, kMusicPath) == FR_OK) { + f_closedir(&dir); + root = kMusicPath; + } + ESP_LOGI(kTag, "scanning for new tracks in '%s'", root.c_str()); + track_finder_.launch(root); }; auto Database::processCandidateCallback(FILINFO& info, std::string_view path) -- cgit v1.2.3