From a9d2335e1d86b3012789a440e7f0e71033393056 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Tue, 9 Jul 2024 14:41:02 +1000
Subject: Break FatfsStreamFactory's dep on ServiceLocator

---
 src/tangara/database/database.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src/tangara/database/database.cpp')
diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp
index cf1430b3..85700431 100644
--- a/src/tangara/database/database.cpp
+++ b/src/tangara/database/database.cpp
@@ -684,6 +684,12 @@ auto Database::countRecords(const SearchKey& c) -> size_t {
   return count;
 }
 
+Handle::Handle(std::shared_ptr<Database>& db) : db_(db) {}
+
+auto Handle::lock() -> std::shared_ptr<Database> {
+  return db_;
+}
+
 auto SearchKey::startKey() const -> std::string_view {
   if (key) {
     return *key;
-- 
cgit v1.2.3


From f8a3c16aad4e55bd19374c5029b4ac606b07dd7d Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Thu, 8 Aug 2024 10:29:46 +1000
Subject: Use one MMU page per leveldb write buffer

Also drop some of the other tuning changes, since they don't seem to
impact much.
---
 src/tangara/database/database.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/tangara/database/database.cpp')

diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp
index 85700431..e3f3df67 100644
--- a/src/tangara/database/database.cpp
+++ b/src/tangara/database/database.cpp
@@ -144,10 +144,10 @@ auto Database::Open(IFileGatherer& gatherer,
 
             leveldb::Options options;
             options.env = sEnv.env();
-            options.write_buffer_size = 4 * 1024;
-            options.max_file_size = 16 * 1024;
+            // Match the write buffer size to the MMU page size in order to
+            // make most efficient use of PSRAM mapping.
+            options.write_buffer_size = CONFIG_MMU_PAGE_SIZE;
             options.block_cache = cache.get();
-            options.block_size = 2048;
 
             auto status = leveldb::DB::Open(options, kDbPath, &db);
             if (!status.ok()) {
-- 
cgit v1.2.3


From b5dc53670a259c3fdf2d3f20f52880f2218221d7 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Thu, 8 Aug 2024 12:30:49 +1000
Subject: Derive the next track id from stored track data, instead of tracking
 it explicitly

This saves about 1ms per new track right now, but more importantly means
that minting a new track id is now a single atomic operation, rather
than being its own database write. This is a useful property that will
come in handy in a few commits time.
---
 src/tangara/database/database.cpp | 56 ++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 16 deletions(-)

(limited to 'src/tangara/database/database.cpp')

diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp
index e3f3df67..c543b941 100644
--- a/src/tangara/database/database.cpp
+++ b/src/tangara/database/database.cpp
@@ -24,6 +24,7 @@
 #include "cppbor.h"
 #include "cppbor_parse.h"
 #include "database/index.hpp"
+#include "debug.hpp"
 #include "esp_log.h"
 #include "esp_timer.h"
 #include "ff.h"
@@ -60,7 +61,6 @@ static const char kKeyDbVersion[] = "schema_version";
 
 static const char kKeyCustom[] = "U\0";
 static const char kKeyCollator[] = "collator";
-static const char kKeyTrackId[] = "next_track_id";
 
 static std::atomic<bool> sIsDbOpen(false);
 
@@ -190,7 +190,10 @@ Database::Database(leveldb::DB* db,
       file_gatherer_(file_gatherer),
       tag_parser_(tag_parser),
       collator_(collator),
-      is_updating_(false) {}
+      is_updating_(false) {
+  dbCalculateNextTrackId();
+  ESP_LOGI(kTag, "next track id is %lu", next_track_id_.load());
+}
 
 Database::~Database() {
   // Delete db_ first so that any outstanding background work finishes before
@@ -492,24 +495,45 @@ auto Database::isUpdating() -> bool {
   return is_updating_;
 }
 
-auto Database::dbMintNewTrackId() -> TrackId {
-  TrackId next_id = 1;
-  std::string val;
-  auto status = db_->Get(leveldb::ReadOptions(), kKeyTrackId, &val);
-  if (status.ok()) {
-    next_id = BytesToTrackId(val).value_or(next_id);
-  } else if (!status.IsNotFound()) {
-    // TODO(jacqueline): Handle this more.
-    ESP_LOGE(kTag, "failed to get next track id");
+auto Database::dbCalculateNextTrackId() -> void {
+  std::unique_ptr<leveldb::Iterator> it{
+      db_->NewIterator(leveldb::ReadOptions())};
+
+  // Track data entries are of the format 'D/trackid', where track ids are
+  // encoded as big-endian cbor types. They can therefore be compared through
+  // byte ordering, which means we can determine what the next id should be by
+  // looking at the larged track data record in the database.
+  std::string prefix = EncodeDataPrefix();
+  std::string prefixPlusOne = prefix;
+  prefixPlusOne[prefixPlusOne.size() - 1]++;
+
+  // Seek to just past the track data section.
+  it->Seek(prefixPlusOne);
+  if (!it->Valid()) {
+    next_track_id_ = 1;
+    return;
   }
 
-  if (!db_->Put(leveldb::WriteOptions(), kKeyTrackId,
-                TrackIdToBytes(next_id + 1))
-           .ok()) {
-    ESP_LOGE(kTag, "failed to write next track id");
+  // Go back to the last track data record.
+  it->Prev();
+  if (!it->Valid() || !it->key().starts_with(prefix)) {
+    next_track_id_ = 1;
+    return;
   }
 
-  return next_id;
+  // Parse the track id back out of the key.
+  std::span<const char> key{it->key().data(), it->key().size()};
+  auto id_part = key.subspan(prefix.size());
+  if (id_part.empty()) {
+    next_track_id_ = 1;
+    return;
+  }
+
+  next_track_id_ = BytesToTrackId(id_part).value_or(0) + 1;
+}
+
+auto Database::dbMintNewTrackId() -> TrackId {
+  return next_track_id_++;
 }
 
 auto Database::dbEntomb(TrackId id, uint64_t hash) -> void {
-- 
cgit v1.2.3


From 30aaefca64445efa421edb93403036d59382920f Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Thu, 8 Aug 2024 14:35:53 +1000
Subject: Batch up the db operations associated with adding new tracks

This is ostensibly yet another 'prepare for multithreaded updates'
commit, however it does actually save us another 60(!!) odd milliseconds
per track.
---
 src/tangara/database/database.cpp | 183 ++++++++++++++++----------------------
 1 file changed, 75 insertions(+), 108 deletions(-)

(limited to 'src/tangara/database/database.cpp')

diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp
index c543b941..aec661d9 100644
--- a/src/tangara/database/database.cpp
+++ b/src/tangara/database/database.cpp
@@ -352,11 +352,19 @@ auto Database::updateIndexes() -> void {
         // We couldn't read the tags for this track. Either they were
         // malformed, or perhaps the file is missing. Either way, tombstone
         // this record.
-        ESP_LOGW(kTag, "entombing missing #%lx", track->id);
+        ESP_LOGI(kTag, "entombing missing #%lx", track->id);
+
+        // Remove the indexes first, so that interrupted operations don't leave
+        // dangling index records.
         dbRemoveIndexes(track);
+
+        // Do the rest of the tombstoning as one atomic write.
+        leveldb::WriteBatch batch;
         track->is_tombstoned = true;
-        dbPutTrackData(*track);
-        db_->Delete(leveldb::WriteOptions{}, EncodePathKey(track->filepath));
+        batch.Put(EncodeDataKey(track->id), EncodeDataValue(*track));
+        batch.Delete(EncodePathKey(track->filepath));
+
+        db_->Write(leveldb::WriteOptions(), &batch);
         continue;
       }
 
@@ -370,12 +378,20 @@ auto Database::updateIndexes() -> void {
         // database.
         ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash,
                  new_hash);
+
+        // Again, we remove the old index records first so has to avoid
+        // dangling references.
         dbRemoveIndexes(track);
 
+        // Atomically correct the hash + create the new index records.
+        leveldb::WriteBatch batch;
         track->tags_hash = new_hash;
-        dbIngestTagHashes(*tags, track->individual_tag_hashes);
-        dbPutTrackData(*track);
-        dbPutHash(new_hash, track->id);
+        dbIngestTagHashes(*tags, track->individual_tag_hashes, batch);
+
+        dbCreateIndexesForTrack(*track, *tags, batch);
+        batch.Put(EncodeDataKey(track->id), EncodeDataValue(*track));
+        batch.Put(EncodeHashKey(new_hash), EncodeHashValue(track->id));
+        db_->Write(leveldb::WriteOptions(), &batch);
       }
     }
   }
@@ -404,72 +420,56 @@ auto Database::updateIndexes() -> void {
       return;
     }
 
-    // Check for any existing record with the same hash.
+    // Check for any existing track with the same hash.
     uint64_t hash = tags->Hash();
-    std::string key = EncodeHashKey(hash);
-    std::optional<TrackId> existing_hash;
+    std::optional<TrackId> existing_id;
     std::string raw_entry;
-    if (db_->Get(leveldb::ReadOptions(), key, &raw_entry).ok()) {
-      existing_hash = ParseHashValue(raw_entry);
+    if (db_->Get(leveldb::ReadOptions(), EncodeHashKey(hash), &raw_entry)
+            .ok()) {
+      existing_id = ParseHashValue(raw_entry);
     }
 
-    std::pair<uint16_t, uint16_t> modified{info.fdate, info.ftime};
-    if (!existing_hash) {
-      // We've never met this track before! Or we have, but the entry is
-      // malformed. Either way, record this as a new track.
-      TrackId id = dbMintNewTrackId();
-      ESP_LOGD(kTag, "recording new 0x%lx", id);
+    std::shared_ptr<TrackData> data;
+    if (existing_id) {
+      // Do we have any existing data for this track? This could be the case if
+      // this is a tombstoned entry. In such as case, we want to reuse the
+      // previous TrackData so that any extra metadata is preserved.
+      data = dbGetTrackData(*existing_id);
+      if (!data) {
+        data = std::make_shared<TrackData>();
+        data->id = *existing_id;
+      } else if (data->filepath != path) {
+        ESP_LOGW(kTag, "hash collision: %s, %s, %s",
+                 tags->title().value_or("no title").c_str(),
+                 tags->artist().value_or("no artist").c_str(),
+                 tags->album().value_or("no album").c_str());
+        // Don't commit anything if there's a hash collision, since we're
+        // likely to make a big mess.
+        return;
+      }
+    } else {
       num_new_tracks++;
-
-      auto data = std::make_shared<TrackData>();
-      data->id = id;
-      data->filepath = path;
-      data->tags_hash = hash;
-      data->modified_at = modified;
-      dbIngestTagHashes(*tags, data->individual_tag_hashes);
-
-      dbPutTrackData(*data);
-      dbPutHash(hash, id);
-      auto t = std::make_shared<Track>(data, tags);
-      dbCreateIndexesForTrack(*t);
-      db_->Put(leveldb::WriteOptions{}, EncodePathKey(path),
-               TrackIdToBytes(id));
-      return;
+      data = std::make_shared<TrackData>();
+      data->id = dbMintNewTrackId();
     }
 
-    std::shared_ptr<TrackData> existing_data = dbGetTrackData(*existing_hash);
-    if (!existing_data) {
-      // We found a hash that matches, but there's no data record? Weird.
-      auto new_data = std::make_shared<TrackData>();
-      new_data->id = dbMintNewTrackId();
-      new_data->filepath = path;
-      new_data->tags_hash = hash;
-      new_data->modified_at = modified;
-      dbIngestTagHashes(*tags, new_data->individual_tag_hashes);
-      dbPutTrackData(*new_data);
-      auto t = std::make_shared<Track>(new_data, tags);
-      dbCreateIndexesForTrack(*t);
-      db_->Put(leveldb::WriteOptions{}, EncodePathKey(path),
-               TrackIdToBytes(new_data->id));
-      return;
-    }
+    // Make sure the file-based metadata on the TrackData is up to date.
+    data->filepath = path;
+    data->tags_hash = hash;
+    data->modified_at = {info.fdate, info.ftime};
 
-    if (existing_data->is_tombstoned) {
-      ESP_LOGI(kTag, "exhuming track %lu", existing_data->id);
-      existing_data->is_tombstoned = false;
-      existing_data->modified_at = modified;
-      dbPutTrackData(*existing_data);
-      auto t = std::make_shared<Track>(existing_data, tags);
-      dbCreateIndexesForTrack(*t);
-      db_->Put(leveldb::WriteOptions{}, EncodePathKey(path),
-               TrackIdToBytes(existing_data->id));
-    } else if (existing_data->filepath !=
-               std::pmr::string{path.data(), path.size()}) {
-      ESP_LOGW(kTag, "hash collision: %s, %s, %s",
-               tags->title().value_or("no title").c_str(),
-               tags->artist().value_or("no artist").c_str(),
-               tags->album().value_or("no album").c_str());
-    }
+    // Apply all the actual database changes as one atomic batch. This makes
+    // the whole 'new track' operation atomic, and also reduces the amount of
+    // lock contention when adding many tracks at once.
+    leveldb::WriteBatch batch;
+    dbIngestTagHashes(*tags, data->individual_tag_hashes, batch);
+
+    dbCreateIndexesForTrack(*data, *tags, batch);
+    batch.Put(EncodeDataKey(data->id), EncodeDataValue(*data));
+    batch.Put(EncodeHashKey(data->tags_hash), EncodeHashValue(data->id));
+    batch.Put(EncodePathKey(path), TrackIdToBytes(data->id));
+
+    db_->Write(leveldb::WriteOptions(), &batch);
   });
 
   uint64_t end_time = esp_timer_get_time();
@@ -536,22 +536,6 @@ auto Database::dbMintNewTrackId() -> TrackId {
   return next_track_id_++;
 }
 
-auto Database::dbEntomb(TrackId id, uint64_t hash) -> void {
-  std::string key = EncodeHashKey(hash);
-  std::string val = EncodeHashValue(id);
-  if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) {
-    ESP_LOGE(kTag, "failed to entomb #%llx (id #%lx)", hash, id);
-  }
-}
-
-auto Database::dbPutTrackData(const TrackData& s) -> void {
-  std::string key = EncodeDataKey(s.id);
-  std::string val = EncodeDataValue(s);
-  if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) {
-    ESP_LOGE(kTag, "failed to write data for #%lx", s.id);
-  }
-}
-
 auto Database::dbGetTrackData(TrackId id) -> std::shared_ptr<TrackData> {
   std::string key = EncodeDataKey(id);
   std::string raw_val;
@@ -562,33 +546,19 @@ auto Database::dbGetTrackData(TrackId id) -> std::shared_ptr<TrackData> {
   return ParseDataValue(raw_val);
 }
 
-auto Database::dbPutHash(const uint64_t& hash, TrackId i) -> void {
-  std::string key = EncodeHashKey(hash);
-  std::string val = EncodeHashValue(i);
-  if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) {
-    ESP_LOGE(kTag, "failed to write hash for #%lx", i);
-  }
-}
-
-auto Database::dbGetHash(const uint64_t& hash) -> std::optional<TrackId> {
-  std::string key = EncodeHashKey(hash);
-  std::string raw_val;
-  if (!db_->Get(leveldb::ReadOptions(), key, &raw_val).ok()) {
-    ESP_LOGW(kTag, "no key found for hash #%llx", hash);
-    return {};
-  }
-  return ParseHashValue(raw_val);
+auto Database::dbCreateIndexesForTrack(const Track& track,
+                                       leveldb::WriteBatch& batch) -> void {
+  dbCreateIndexesForTrack(track.data(), track.tags(), batch);
 }
 
-auto Database::dbCreateIndexesForTrack(const Track& track) -> void {
+auto Database::dbCreateIndexesForTrack(const TrackData& data,
+                                       const TrackTags& tags,
+                                       leveldb::WriteBatch& batch) -> void {
   for (const IndexInfo& index : getIndexes()) {
-    leveldb::WriteBatch writes;
-    auto entries = Index(collator_, index, track);
+    auto entries = Index(collator_, index, data, tags);
     for (const auto& it : entries) {
-      writes.Put(EncodeIndexKey(it.first),
-                 {it.second.data(), it.second.size()});
+      batch.Put(EncodeIndexKey(it.first), {it.second.data(), it.second.size()});
     }
-    db_->Write(leveldb::WriteOptions(), &writes);
   }
 }
 
@@ -597,9 +567,8 @@ auto Database::dbRemoveIndexes(std::shared_ptr<TrackData> data) -> void {
   if (!tags) {
     return;
   }
-  Track track{data, tags};
   for (const IndexInfo& index : getIndexes()) {
-    auto entries = Index(collator_, index, track);
+    auto entries = Index(collator_, index, *data, *tags);
     for (auto it = entries.rbegin(); it != entries.rend(); it++) {
       auto key = EncodeIndexKey(it->first);
       auto status = db_->Delete(leveldb::WriteOptions{}, key);
@@ -626,16 +595,14 @@ auto Database::dbRemoveIndexes(std::shared_ptr<TrackData> data) -> void {
 }
 
 auto Database::dbIngestTagHashes(const TrackTags& tags,
-                                 std::pmr::unordered_map<Tag, uint64_t>& out)
-    -> void {
-  leveldb::WriteBatch batch{};
+                                 std::pmr::unordered_map<Tag, uint64_t>& out,
+                                 leveldb::WriteBatch& batch) -> void {
   for (const auto& tag : tags.allPresent()) {
     auto val = tags.get(tag);
     auto hash = tagHash(val);
     batch.Put(EncodeTagHashKey(hash), tagToString(val));
     out[tag] = hash;
   }
-  db_->Write(leveldb::WriteOptions{}, &batch);
 }
 
 auto Database::dbRecoverTagsFromHashes(
-- 
cgit v1.2.3


From 28cf749951a8f811606bb233efecfd36738c3c89 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Thu, 8 Aug 2024 16:08:46 +1000
Subject: Make FileGatherer shaped more like a normal iterator

---
 src/tangara/database/database.cpp | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

(limited to 'src/tangara/database/database.cpp')

diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp
index aec661d9..2d72fe95 100644
--- a/src/tangara/database/database.cpp
+++ b/src/tangara/database/database.cpp
@@ -24,6 +24,7 @@
 #include "cppbor.h"
 #include "cppbor_parse.h"
 #include "database/index.hpp"
+#include "database/track_finder.hpp"
 #include "debug.hpp"
 #include "esp_log.h"
 #include "esp_timer.h"
@@ -40,7 +41,6 @@
 
 #include "database/db_events.hpp"
 #include "database/env_esp.hpp"
-#include "database/file_gatherer.hpp"
 #include "database/records.hpp"
 #include "database/tag_parser.hpp"
 #include "database/track.hpp"
@@ -122,8 +122,7 @@ static auto CheckDatabase(leveldb::DB& db, locale::ICollator& col) -> bool {
   return true;
 }
 
-auto Database::Open(IFileGatherer& gatherer,
-                    ITagParser& parser,
+auto Database::Open(ITagParser& parser,
                     locale::ICollator& collator,
                     tasks::WorkerPool& bg_worker)
     -> cpp::result<Database*, DatabaseError> {
@@ -168,8 +167,7 @@ auto Database::Open(IFileGatherer& gatherer,
             }
 
             ESP_LOGI(kTag, "Database opened successfully");
-            return new Database(db, cache.release(), gatherer, parser,
-                                collator);
+            return new Database(db, cache.release(), parser, collator);
           })
       .get();
 }
@@ -182,12 +180,10 @@ auto Database::Destroy() -> void {
 
 Database::Database(leveldb::DB* db,
                    leveldb::Cache* cache,
-                   IFileGatherer& file_gatherer,
                    ITagParser& tag_parser,
                    locale::ICollator& collator)
     : db_(db),
       cache_(cache),
-      file_gatherer_(file_gatherer),
       tag_parser_(tag_parser),
       collator_(collator),
       is_updating_(false) {
@@ -401,7 +397,11 @@ auto Database::updateIndexes() -> void {
   // Stage 2: search for newly added files.
   ESP_LOGI(kTag, "scanning for new tracks");
   uint64_t num_files = 0;
-  file_gatherer_.FindFiles("", [&](std::string_view path, const FILINFO& info) {
+
+  auto track_finder = std::make_shared<TrackFinder>("");
+
+  FILINFO info;
+  while (auto path = track_finder->next(info)) {
     num_files++;
     events::Ui().Dispatch(event::UpdateProgress{
         .stage = event::UpdateProgress::Stage::kScanningForNewTracks,
@@ -409,15 +409,15 @@ auto Database::updateIndexes() -> void {
     });
 
     std::string unused;
-    if (db_->Get(read_options, EncodePathKey(path), &unused).ok()) {
+    if (db_->Get(read_options, EncodePathKey(*path), &unused).ok()) {
       // This file is already in the database; skip it.
-      return;
+      continue;
     }
 
-    std::shared_ptr<TrackTags> tags = tag_parser_.ReadAndParseTags(path);
+    std::shared_ptr<TrackTags> tags = tag_parser_.ReadAndParseTags(*path);
     if (!tags || tags->encoding() == Container::kUnsupported) {
       // No parseable tags; skip this fiile.
-      return;
+      continue;
     }
 
     // Check for any existing track with the same hash.
@@ -438,14 +438,14 @@ auto Database::updateIndexes() -> void {
       if (!data) {
         data = std::make_shared<TrackData>();
         data->id = *existing_id;
-      } else if (data->filepath != path) {
+      } else if (std::string_view{data->filepath} != *path) {
         ESP_LOGW(kTag, "hash collision: %s, %s, %s",
                  tags->title().value_or("no title").c_str(),
                  tags->artist().value_or("no artist").c_str(),
                  tags->album().value_or("no album").c_str());
         // Don't commit anything if there's a hash collision, since we're
         // likely to make a big mess.
-        return;
+        continue;
       }
     } else {
       num_new_tracks++;
@@ -454,7 +454,7 @@ auto Database::updateIndexes() -> void {
     }
 
     // Make sure the file-based metadata on the TrackData is up to date.
-    data->filepath = path;
+    data->filepath = *path;
     data->tags_hash = hash;
     data->modified_at = {info.fdate, info.ftime};
 
@@ -467,10 +467,10 @@ auto Database::updateIndexes() -> void {
     dbCreateIndexesForTrack(*data, *tags, batch);
     batch.Put(EncodeDataKey(data->id), EncodeDataValue(*data));
     batch.Put(EncodeHashKey(data->tags_hash), EncodeHashValue(data->id));
-    batch.Put(EncodePathKey(path), TrackIdToBytes(data->id));
+    batch.Put(EncodePathKey(*path), TrackIdToBytes(data->id));
 
     db_->Write(leveldb::WriteOptions(), &batch);
-  });
+  };
 
   uint64_t end_time = esp_timer_get_time();
 
-- 
cgit v1.2.3


From 2ad83cb2108dc55c9eb0573b0645513a1e8a61f5 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Fri, 9 Aug 2024 11:43:48 +1000
Subject: Shard searching for new tracks across multiple tasks

This also has the effect of breaking up the enormous 'updateIndexes'
method into one call per file, which means database updates also no
longer monopolise a single background task for their entire duration.

avg. time per new file is now <140ms for a completely fresh database,
which is pretty good i think!
---
 src/tangara/database/database.cpp | 253 ++++++++++++++++++++------------------
 1 file changed, 132 insertions(+), 121 deletions(-)

(limited to 'src/tangara/database/database.cpp')

diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp
index 2d72fe95..491ad8b7 100644
--- a/src/tangara/database/database.cpp
+++ b/src/tangara/database/database.cpp
@@ -6,9 +6,6 @@
 
 #include "database/database.hpp"
 
-#include <stdint.h>
-#include <sys/_stdint.h>
-
 #include <algorithm>
 #include <cstdint>
 #include <functional>
@@ -20,12 +17,8 @@
 #include <string>
 #include <variant>
 
-#include "collation.hpp"
 #include "cppbor.h"
 #include "cppbor_parse.h"
-#include "database/index.hpp"
-#include "database/track_finder.hpp"
-#include "debug.hpp"
 #include "esp_log.h"
 #include "esp_timer.h"
 #include "ff.h"
@@ -39,12 +32,14 @@
 #include "leveldb/status.h"
 #include "leveldb/write_batch.h"
 
+#include "collation.hpp"
 #include "database/db_events.hpp"
 #include "database/env_esp.hpp"
+#include "database/index.hpp"
 #include "database/records.hpp"
 #include "database/tag_parser.hpp"
 #include "database/track.hpp"
-#include "drivers/spi.hpp"
+#include "database/track_finder.hpp"
 #include "events/event_queue.hpp"
 #include "memory_resource.hpp"
 #include "result.hpp"
@@ -58,12 +53,16 @@ static SingletonEnv<leveldb::EspEnv> sEnv;
 static const char kDbPath[] = "/.tangara-db";
 
 static const char kKeyDbVersion[] = "schema_version";
-
 static const char kKeyCustom[] = "U\0";
 static const char kKeyCollator[] = "collator";
 
+static constexpr size_t kMaxParallelism = 2;
+
 static std::atomic<bool> sIsDbOpen(false);
 
+using std::placeholders::_1;
+using std::placeholders::_2;
+
 static auto CreateNewDatabase(leveldb::Options& options, locale::ICollator& col)
     -> leveldb::DB* {
   Database::Destroy();
@@ -167,7 +166,8 @@ auto Database::Open(ITagParser& parser,
             }
 
             ESP_LOGI(kTag, "Database opened successfully");
-            return new Database(db, cache.release(), parser, collator);
+            return new Database(db, cache.release(), bg_worker, parser,
+                                collator);
           })
       .get();
 }
@@ -180,15 +180,20 @@ auto Database::Destroy() -> void {
 
 Database::Database(leveldb::DB* db,
                    leveldb::Cache* cache,
+                   tasks::WorkerPool& pool,
                    ITagParser& tag_parser,
                    locale::ICollator& collator)
     : db_(db),
       cache_(cache),
+      track_finder_(
+          pool,
+          kMaxParallelism,
+          std::bind(&Database::processCandidateCallback, this, _1, _2),
+          std::bind(&Database::indexingCompleteCallback, this)),
       tag_parser_(tag_parser),
       collator_(collator),
       is_updating_(false) {
   dbCalculateNextTrackId();
-  ESP_LOGI(kTag, "next track id is %lu", next_track_id_.load());
 }
 
 Database::~Database() {
@@ -243,7 +248,7 @@ auto Database::get(const std::string& key) -> std::optional<std::string> {
 }
 
 auto Database::getTrackPath(TrackId id) -> std::optional<std::string> {
-  auto track_data = dbGetTrackData(id);
+  auto track_data = dbGetTrackData(leveldb::ReadOptions(), id);
   if (!track_data) {
     return {};
   }
@@ -251,7 +256,7 @@ auto Database::getTrackPath(TrackId id) -> std::optional<std::string> {
 }
 
 auto Database::getTrack(TrackId id) -> std::shared_ptr<Track> {
-  std::shared_ptr<TrackData> data = dbGetTrackData(id);
+  std::shared_ptr<TrackData> data = dbGetTrackData(leveldb::ReadOptions(), id);
   if (!data || data->is_tombstoned) {
     return {};
   }
@@ -274,34 +279,61 @@ auto Database::getIndexes() -> std::vector<IndexInfo> {
   };
 }
 
-class UpdateNotifier {
- public:
-  UpdateNotifier(std::atomic<bool>& is_updating) : is_updating_(is_updating) {
-    events::Ui().Dispatch(event::UpdateStarted{});
-    events::System().Dispatch(event::UpdateStarted{});
+Database::UpdateTracker::UpdateTracker()
+    : num_old_tracks_(0),
+      num_new_tracks_(0),
+      start_time_(esp_timer_get_time()) {
+  events::Ui().Dispatch(event::UpdateStarted{});
+  events::System().Dispatch(event::UpdateStarted{});
+}
+
+Database::UpdateTracker::~UpdateTracker() {
+  uint64_t end_time = esp_timer_get_time();
+
+  uint64_t time_per_old = 0;
+  if (num_old_tracks_) {
+    time_per_old = (verification_finish_time_ - start_time_) / num_old_tracks_;
   }
-  ~UpdateNotifier() {
-    is_updating_ = false;
-    events::Ui().Dispatch(event::UpdateFinished{});
-    events::System().Dispatch(event::UpdateFinished{});
+  uint64_t time_per_new = 0;
+  if (num_new_tracks_) {
+    time_per_new = (end_time - verification_finish_time_) / num_new_tracks_;
   }
 
- private:
-  std::atomic<bool>& is_updating_;
-};
+  ESP_LOGI(
+      kTag,
+      "processed %lu old tracks and %lu new tracks in %llu seconds (%llums "
+      "per old, %llums per new)",
+      num_old_tracks_, num_new_tracks_, (end_time - start_time_) / 1000000,
+      time_per_old / 1000, time_per_new / 1000);
+
+  events::Ui().Dispatch(event::UpdateFinished{});
+  events::System().Dispatch(event::UpdateFinished{});
+}
+
+auto Database::UpdateTracker::onTrackVerified() -> void {
+  events::Ui().Dispatch(event::UpdateProgress{
+      .stage = event::UpdateProgress::Stage::kVerifyingExistingTracks,
+      .val = ++num_old_tracks_,
+  });
+}
+
+auto Database::UpdateTracker::onVerificationFinished() -> void {
+  verification_finish_time_ = esp_timer_get_time();
+}
+
+auto Database::UpdateTracker::onTrackAdded() -> void {
+  num_new_tracks_++;
+}
 
 auto Database::updateIndexes() -> void {
   if (is_updating_.exchange(true)) {
     return;
   }
-  UpdateNotifier notifier{is_updating_};
-
-  uint32_t num_old_tracks = 0;
-  uint32_t num_new_tracks = 0;
-  uint64_t start_time = esp_timer_get_time();
+  update_tracker_ = std::make_unique<UpdateTracker>();
 
   leveldb::ReadOptions read_options;
-  read_options.fill_cache = true;
+  read_options.fill_cache = false;
+  read_options.verify_checksums = true;
 
   // Stage 1: verify all existing tracks are still valid.
   ESP_LOGI(kTag, "verifying existing tracks");
@@ -310,11 +342,7 @@ auto Database::updateIndexes() -> void {
     std::string prefix = EncodeDataPrefix();
     for (it->Seek(prefix); it->Valid() && it->key().starts_with(prefix);
          it->Next()) {
-      num_old_tracks++;
-      events::Ui().Dispatch(event::UpdateProgress{
-          .stage = event::UpdateProgress::Stage::kVerifyingExistingTracks,
-          .val = num_old_tracks,
-      });
+      update_tracker_->onTrackVerified();
 
       std::shared_ptr<TrackData> track = ParseDataValue(it->value());
       if (!track) {
@@ -325,7 +353,6 @@ auto Database::updateIndexes() -> void {
       }
 
       if (track->is_tombstoned) {
-        ESP_LOGW(kTag, "skipping tombstoned %lx", track->id);
         continue;
       }
 
@@ -392,103 +419,86 @@ auto Database::updateIndexes() -> void {
     }
   }
 
-  uint64_t verify_end_time = esp_timer_get_time();
+  update_tracker_->onVerificationFinished();
 
   // Stage 2: search for newly added files.
   ESP_LOGI(kTag, "scanning for new tracks");
-  uint64_t num_files = 0;
-
-  auto track_finder = std::make_shared<TrackFinder>("");
+  track_finder_.launch("");
+};
 
-  FILINFO info;
-  while (auto path = track_finder->next(info)) {
-    num_files++;
-    events::Ui().Dispatch(event::UpdateProgress{
-        .stage = event::UpdateProgress::Stage::kScanningForNewTracks,
-        .val = num_files,
-    });
+auto Database::processCandidateCallback(FILINFO& info, std::string_view path)
+    -> void {
+  leveldb::ReadOptions read_options;
+  read_options.fill_cache = true;
+  read_options.verify_checksums = false;
 
-    std::string unused;
-    if (db_->Get(read_options, EncodePathKey(*path), &unused).ok()) {
-      // This file is already in the database; skip it.
-      continue;
-    }
+  std::string unused;
+  if (db_->Get(read_options, EncodePathKey(path), &unused).ok()) {
+    // This file is already in the database; skip it.
+    return;
+  }
 
-    std::shared_ptr<TrackTags> tags = tag_parser_.ReadAndParseTags(*path);
-    if (!tags || tags->encoding() == Container::kUnsupported) {
-      // No parseable tags; skip this fiile.
-      continue;
-    }
+  std::shared_ptr<TrackTags> tags = tag_parser_.ReadAndParseTags(path);
+  if (!tags || tags->encoding() == Container::kUnsupported) {
+    // No parseable tags; skip this fiile.
+    return;
+  }
 
-    // Check for any existing track with the same hash.
-    uint64_t hash = tags->Hash();
-    std::optional<TrackId> existing_id;
-    std::string raw_entry;
-    if (db_->Get(leveldb::ReadOptions(), EncodeHashKey(hash), &raw_entry)
-            .ok()) {
-      existing_id = ParseHashValue(raw_entry);
-    }
+  // Check for any existing track with the same hash.
+  uint64_t hash = tags->Hash();
+  std::optional<TrackId> existing_id;
+  std::string raw_entry;
+  if (db_->Get(read_options, EncodeHashKey(hash), &raw_entry).ok()) {
+    existing_id = ParseHashValue(raw_entry);
+  }
 
-    std::shared_ptr<TrackData> data;
-    if (existing_id) {
-      // Do we have any existing data for this track? This could be the case if
-      // this is a tombstoned entry. In such as case, we want to reuse the
-      // previous TrackData so that any extra metadata is preserved.
-      data = dbGetTrackData(*existing_id);
-      if (!data) {
-        data = std::make_shared<TrackData>();
-        data->id = *existing_id;
-      } else if (std::string_view{data->filepath} != *path) {
-        ESP_LOGW(kTag, "hash collision: %s, %s, %s",
-                 tags->title().value_or("no title").c_str(),
-                 tags->artist().value_or("no artist").c_str(),
-                 tags->album().value_or("no album").c_str());
-        // Don't commit anything if there's a hash collision, since we're
-        // likely to make a big mess.
-        continue;
-      }
-    } else {
-      num_new_tracks++;
+  std::shared_ptr<TrackData> data;
+  if (existing_id) {
+    // Do we have any existing data for this track? This could be the case if
+    // this is a tombstoned entry. In such as case, we want to reuse the
+    // previous TrackData so that any extra metadata is preserved.
+    data = dbGetTrackData(read_options, *existing_id);
+    if (!data) {
       data = std::make_shared<TrackData>();
-      data->id = dbMintNewTrackId();
+      data->id = *existing_id;
+    } else if (data->filepath != path && !data->is_tombstoned) {
+      ESP_LOGW(kTag, "hash collision: %s, %s, %s",
+               tags->title().value_or("no title").c_str(),
+               tags->artist().value_or("no artist").c_str(),
+               tags->album().value_or("no album").c_str());
+      // Don't commit anything if there's a hash collision, since we're
+      // likely to make a big mess.
+      return;
     }
+  } else {
+    update_tracker_->onTrackAdded();
+    data = std::make_shared<TrackData>();
+    data->id = dbMintNewTrackId();
+  }
 
-    // Make sure the file-based metadata on the TrackData is up to date.
-    data->filepath = *path;
-    data->tags_hash = hash;
-    data->modified_at = {info.fdate, info.ftime};
-
-    // Apply all the actual database changes as one atomic batch. This makes
-    // the whole 'new track' operation atomic, and also reduces the amount of
-    // lock contention when adding many tracks at once.
-    leveldb::WriteBatch batch;
-    dbIngestTagHashes(*tags, data->individual_tag_hashes, batch);
-
-    dbCreateIndexesForTrack(*data, *tags, batch);
-    batch.Put(EncodeDataKey(data->id), EncodeDataValue(*data));
-    batch.Put(EncodeHashKey(data->tags_hash), EncodeHashValue(data->id));
-    batch.Put(EncodePathKey(*path), TrackIdToBytes(data->id));
+  // Make sure the file-based metadata on the TrackData is up to date.
+  data->filepath = path;
+  data->tags_hash = hash;
+  data->modified_at = {info.fdate, info.ftime};
+  data->is_tombstoned = false;
 
-    db_->Write(leveldb::WriteOptions(), &batch);
-  };
+  // Apply all the actual database changes as one atomic batch. This makes
+  // the whole 'new track' operation atomic, and also reduces the amount of
+  // lock contention when adding many tracks at once.
+  leveldb::WriteBatch batch;
+  dbIngestTagHashes(*tags, data->individual_tag_hashes, batch);
 
-  uint64_t end_time = esp_timer_get_time();
+  dbCreateIndexesForTrack(*data, *tags, batch);
+  batch.Put(EncodeDataKey(data->id), EncodeDataValue(*data));
+  batch.Put(EncodeHashKey(data->tags_hash), EncodeHashValue(data->id));
+  batch.Put(EncodePathKey(path), TrackIdToBytes(data->id));
 
-  uint64_t time_per_old = 0;
-  if (num_old_tracks) {
-    time_per_old = (verify_end_time - start_time) / num_old_tracks;
-  }
-  uint64_t time_per_new = 0;
-  if (num_new_tracks) {
-    time_per_new = (end_time - verify_end_time) / num_new_tracks;
-  }
+  db_->Write(leveldb::WriteOptions(), &batch);
+}
 
-  ESP_LOGI(
-      kTag,
-      "processed %lu old tracks and %lu new tracks in %llu seconds (%llums "
-      "per old, %llums per new)",
-      num_old_tracks, num_new_tracks, (end_time - start_time) / 1000000,
-      time_per_old / 1000, time_per_new / 1000);
+auto Database::indexingCompleteCallback() -> void {
+  update_tracker_.reset();
+  is_updating_ = false;
 }
 
 auto Database::isUpdating() -> bool {
@@ -536,10 +546,11 @@ auto Database::dbMintNewTrackId() -> TrackId {
   return next_track_id_++;
 }
 
-auto Database::dbGetTrackData(TrackId id) -> std::shared_ptr<TrackData> {
+auto Database::dbGetTrackData(leveldb::ReadOptions options, TrackId id)
+    -> std::shared_ptr<TrackData> {
   std::string key = EncodeDataKey(id);
   std::string raw_val;
-  if (!db_->Get(leveldb::ReadOptions(), key, &raw_val).ok()) {
+  if (!db_->Get(options, key, &raw_val).ok()) {
     ESP_LOGW(kTag, "no key found for #%lx", id);
     return {};
   }
-- 
cgit v1.2.3


From dacf3efc45677343479b4d3ff9502504b211639a Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Fri, 6 Sep 2024 14:53:01 +1000
Subject: Look for music in "/Music", with the root dir as a fallback

---
 src/tangara/database/database.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'src/tangara/database/database.cpp')

diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp
index 491ad8b7..64451f48 100644
--- a/src/tangara/database/database.cpp
+++ b/src/tangara/database/database.cpp
@@ -51,6 +51,7 @@ static SingletonEnv<leveldb::EspEnv> sEnv;
 [[maybe_unused]] static const char* kTag = "DB";
 
 static const char kDbPath[] = "/.tangara-db";
+static const char kMusicPath[] = "Music";
 
 static const char kKeyDbVersion[] = "schema_version";
 static const char kKeyCustom[] = "U\0";
@@ -422,8 +423,14 @@ auto Database::updateIndexes() -> void {
   update_tracker_->onVerificationFinished();
 
   // Stage 2: search for newly added files.
-  ESP_LOGI(kTag, "scanning for new tracks");
-  track_finder_.launch("");
+  std::string root;
+  FF_DIR dir;
+  if (f_opendir(&dir, kMusicPath) == FR_OK) {
+    f_closedir(&dir);
+    root = kMusicPath;
+  }
+  ESP_LOGI(kTag, "scanning for new tracks in '%s'", root.c_str());
+  track_finder_.launch(root);
 };
 
 auto Database::processCandidateCallback(FILINFO& info, std::string_view path)
-- 
cgit v1.2.3