WIP merge cyclically dependent components into one big component

author: jacqueline <me@jacqueline.id.au> 2024-05-02 19:12:26 +1000
committer: jacqueline <me@jacqueline.id.au> 2024-05-02 19:12:26 +1000
commit: 1573a8c4cde1cd9528b422b2dcc598e37ffe94a7 (patch)
tree: d162822b8fd7054f81bace0c7a65ab4d5e6f93ef /src/tangara/database
parent: a231fd1c8afedbeb14b0bc77d76bad61db986059 (diff)
download: tangara-fw-1573a8c4cde1cd9528b422b2dcc598e37ffe94a7.tar.gz
19 files changed, 3668 insertions, 0 deletions
diff --git a/src/tangara/database/database.cpp b/src/tangara/database/database.cpp
new file mode 100644
index 00000000..48fb0c63
--- /dev/null
+++ b/src/tangara/database/database.cpp
@@ -0,0 +1,820 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "database.hpp"
+
+#include <stdint.h>
+#include <sys/_stdint.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+#include <iomanip>
+#include <iostream>
+#include <memory>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <variant>
+
+#include "collation.hpp"
+#include "cppbor.h"
+#include "cppbor_parse.h"
+#include "esp_log.h"
+#include "ff.h"
+#include "freertos/projdefs.h"
+#include "index.hpp"
+#include "komihash.h"
+#include "leveldb/cache.h"
+#include "leveldb/db.h"
+#include "leveldb/iterator.h"
+#include "leveldb/options.h"
+#include "leveldb/slice.h"
+#include "leveldb/status.h"
+#include "leveldb/write_batch.h"
+
+#include "db_events.hpp"
+#include "env_esp.hpp"
+#include "event_queue.hpp"
+#include "file_gatherer.hpp"
+#include "memory_resource.hpp"
+#include "records.hpp"
+#include "result.hpp"
+#include "spi.hpp"
+#include "tag_parser.hpp"
+#include "tasks.hpp"
+#include "track.hpp"
+
+namespace database {
+
+static SingletonEnv<leveldb::EspEnv> sEnv;
+[[maybe_unused]] static const char* kTag = "DB";
+
+static const char kDbPath[] = "/.tangara-db";
+
+static const char kKeyDbVersion[] = "schema_version";
+
+static const char kKeyCustom[] = "U\0";
+static const char kKeyCollator[] = "collator";
+static const char kKeyTrackId[] = "next_track_id";
+
+static std::atomic<bool> sIsDbOpen(false);
+
+static auto CreateNewDatabase(leveldb::Options& options, locale::ICollator& col)
+    -> leveldb::DB* {
+  Database::Destroy();
+  leveldb::DB* db;
+  options.create_if_missing = true;
+  auto status = leveldb::DB::Open(options, kDbPath, &db);
+  if (!status.ok()) {
+    ESP_LOGE(kTag, "failed to open db, status %s", status.ToString().c_str());
+    return nullptr;
+  }
+  auto version_str = std::to_string(kCurrentDbVersion);
+  status = db->Put(leveldb::WriteOptions{}, kKeyDbVersion, version_str);
+  if (!status.ok()) {
+    delete db;
+    return nullptr;
+  }
+  ESP_LOGI(kTag, "opening db with collator %s",
+           col.Describe().value_or("NULL").c_str());
+  status = db->Put(leveldb::WriteOptions{}, kKeyCollator,
+                   col.Describe().value_or(""));
+  if (!status.ok()) {
+    delete db;
+    return nullptr;
+  }
+  return db;
+}
+
+static auto CheckDatabase(leveldb::DB& db, locale::ICollator& col) -> bool {
+  leveldb::Status status;
+
+  std::string raw_version;
+  std::optional<uint8_t> version{};
+  status = db.Get(leveldb::ReadOptions{}, kKeyDbVersion, &raw_version);
+  if (status.ok()) {
+    version = std::stoi(raw_version);
+  }
+  if (!version || *version != kCurrentDbVersion) {
+    ESP_LOGW(kTag, "db version missing or incorrect");
+    return false;
+  }
+
+  std::string collator;
+  status = db.Get(leveldb::ReadOptions{}, kKeyCollator, &collator);
+  if (!status.ok()) {
+    ESP_LOGW(kTag, "db collator is unknown");
+    return false;
+  }
+  auto needed = col.Describe();
+
+  if ((needed && needed.value() != collator) ||
+      (!needed && !collator.empty())) {
+    ESP_LOGW(kTag, "db collator is mismatched");
+    return false;
+  }
+
+  return true;
+}
+
+auto Database::Open(IFileGatherer& gatherer,
+                    ITagParser& parser,
+                    locale::ICollator& collator,
+                    tasks::WorkerPool& bg_worker)
+    -> cpp::result<Database*, DatabaseError> {
+  if (sIsDbOpen.exchange(true)) {
+    return cpp::fail(DatabaseError::ALREADY_OPEN);
+  }
+
+  if (!leveldb::sBackgroundThread) {
+    leveldb::sBackgroundThread = &bg_worker;
+  }
+
+  return bg_worker
+      .Dispatch<cpp::result<Database*, DatabaseError>>(
+          [&]() -> cpp::result<Database*, DatabaseError> {
+            leveldb::DB* db;
+            std::unique_ptr<leveldb::Cache> cache{
+                leveldb::NewLRUCache(256 * 1024)};
+
+            leveldb::Options options;
+            options.env = sEnv.env();
+            options.write_buffer_size = 4 * 1024;
+            options.max_file_size = 16 * 1024;
+            options.block_cache = cache.get();
+            options.block_size = 2048;
+
+            auto status = leveldb::DB::Open(options, kDbPath, &db);
+            if (!status.ok()) {
+              ESP_LOGI(kTag, "opening db failed. recreating.");
+              db = CreateNewDatabase(options, collator);
+              if (db == nullptr) {
+                return cpp::fail(FAILED_TO_OPEN);
+              }
+            }
+
+            if (!CheckDatabase(*db, collator)) {
+              ESP_LOGI(kTag, "db incompatible. recreating.");
+              delete db;
+              db = CreateNewDatabase(options, collator);
+              if (db == nullptr) {
+                return cpp::fail(FAILED_TO_OPEN);
+              }
+            }
+
+            ESP_LOGI(kTag, "Database opened successfully");
+            return new Database(db, cache.release(), gatherer, parser,
+                                collator);
+          })
+      .get();
+}
+
+auto Database::Destroy() -> void {
+  leveldb::Options options;
+  options.env = sEnv.env();
+  leveldb::DestroyDB(kDbPath, options);
+}
+
+Database::Database(leveldb::DB* db,
+                   leveldb::Cache* cache,
+                   IFileGatherer& file_gatherer,
+                   ITagParser& tag_parser,
+                   locale::ICollator& collator)
+    : db_(db),
+      cache_(cache),
+      file_gatherer_(file_gatherer),
+      tag_parser_(tag_parser),
+      collator_(collator),
+      is_updating_(false) {}
+
+Database::~Database() {
+  // Delete db_ first so that any outstanding background work finishes before
+  // the background task is killed.
+  delete db_;
+  delete cache_;
+
+  sIsDbOpen.store(false);
+}
+
+auto Database::schemaVersion() -> std::string {
+  // If the database is open, then it must have the current schema.
+  return std::to_string(kCurrentDbVersion);
+}
+
+auto Database::sizeOnDiskBytes() -> size_t {
+  auto lock = drivers::acquire_spi();
+
+  FF_DIR dir;
+  FRESULT res = f_opendir(&dir, kDbPath);
+  if (res != FR_OK) {
+    return 0;
+  }
+
+  size_t total_size = 0;
+  for (;;) {
+    FILINFO info;
+    res = f_readdir(&dir, &info);
+    if (res != FR_OK || info.fname[0] == 0) {
+      break;
+    }
+    total_size += info.fsize;
+  }
+
+  return total_size;
+}
+
+auto Database::put(const std::string& key, const std::string& val) -> void {
+  if (val.empty()) {
+    db_->Delete(leveldb::WriteOptions{}, kKeyCustom + key);
+  } else {
+    db_->Put(leveldb::WriteOptions{}, kKeyCustom + key, val);
+  }
+}
+
+auto Database::get(const std::string& key) -> std::optional<std::string> {
+  std::string val;
+  auto res = db_->Get(leveldb::ReadOptions{}, kKeyCustom + key, &val);
+  if (!res.ok() || val.empty()) {
+    return {};
+  }
+  return val;
+}
+
+auto Database::getTrackPath(TrackId id) -> std::optional<std::string> {
+  auto track_data = dbGetTrackData(id);
+  if (!track_data) {
+    return {};
+  }
+  return std::string{track_data->filepath.data(), track_data->filepath.size()};
+}
+
+auto Database::getTrack(TrackId id) -> std::shared_ptr<Track> {
+  std::shared_ptr<TrackData> data = dbGetTrackData(id);
+  if (!data || data->is_tombstoned) {
+    return {};
+  }
+  std::shared_ptr<TrackTags> tags = tag_parser_.ReadAndParseTags(
+      {data->filepath.data(), data->filepath.size()});
+  if (!tags) {
+    return {};
+  }
+  return std::make_shared<Track>(data, tags);
+}
+
+auto Database::getIndexes() -> std::vector<IndexInfo> {
+  // TODO(jacqueline): This probably needs to be async? When we have runtime
+  // configurable indexes, they will need to come from somewhere.
+  return {
+      kAllTracks,
+      kAllAlbums,
+      kAlbumsByArtist,
+      kTracksByGenre,
+  };
+}
+
+class UpdateNotifier {
+ public:
+  UpdateNotifier(std::atomic<bool>& is_updating) : is_updating_(is_updating) {
+    events::Ui().Dispatch(event::UpdateStarted{});
+    events::System().Dispatch(event::UpdateStarted{});
+  }
+  ~UpdateNotifier() {
+    is_updating_ = false;
+    events::Ui().Dispatch(event::UpdateFinished{});
+    events::System().Dispatch(event::UpdateFinished{});
+  }
+
+ private:
+  std::atomic<bool>& is_updating_;
+};
+
+auto Database::updateIndexes() -> void {
+  if (is_updating_.exchange(true)) {
+    return;
+  }
+  UpdateNotifier notifier{is_updating_};
+
+  leveldb::ReadOptions read_options;
+  read_options.fill_cache = true;
+
+  // Stage 1: verify all existing tracks are still valid.
+  ESP_LOGI(kTag, "verifying existing tracks");
+  {
+    uint64_t num_processed = 0;
+    std::unique_ptr<leveldb::Iterator> it{db_->NewIterator(read_options)};
+    std::string prefix = EncodeDataPrefix();
+    for (it->Seek(prefix); it->Valid() && it->key().starts_with(prefix);
+         it->Next()) {
+      num_processed++;
+      events::Ui().Dispatch(event::UpdateProgress{
+          .stage = event::UpdateProgress::Stage::kVerifyingExistingTracks,
+          .val = num_processed,
+      });
+
+      std::shared_ptr<TrackData> track = ParseDataValue(it->value());
+      if (!track) {
+        // The value was malformed. Drop this record.
+        ESP_LOGW(kTag, "dropping malformed metadata");
+        db_->Delete(leveldb::WriteOptions(), it->key());
+        continue;
+      }
+
+      if (track->is_tombstoned) {
+        ESP_LOGW(kTag, "skipping tombstoned %lx", track->id);
+        continue;
+      }
+
+      FRESULT res;
+      FILINFO info;
+      {
+        auto lock = drivers::acquire_spi();
+        res = f_stat(track->filepath.c_str(), &info);
+      }
+
+      std::pair<uint16_t, uint16_t> modified_at{0, 0};
+      if (res == FR_OK) {
+        modified_at = {info.fdate, info.ftime};
+      }
+      if (modified_at == track->modified_at) {
+        continue;
+      } else {
+        track->modified_at = modified_at;
+      }
+
+      std::shared_ptr<TrackTags> tags = tag_parser_.ReadAndParseTags(
+          {track->filepath.data(), track->filepath.size()});
+      if (!tags || tags->encoding() == Container::kUnsupported) {
+        // We couldn't read the tags for this track. Either they were
+        // malformed, or perhaps the file is missing. Either way, tombstone
+        // this record.
+        ESP_LOGW(kTag, "entombing missing #%lx", track->id);
+        dbRemoveIndexes(track);
+        track->is_tombstoned = true;
+        dbPutTrackData(*track);
+        db_->Delete(leveldb::WriteOptions{}, EncodePathKey(track->filepath));
+        continue;
+      }
+
+      // At this point, we know that the track still exists in its original
+      // location. All that's left to do is update any metadata about it.
+
+      uint64_t new_hash = tags->Hash();
+      if (new_hash != track->tags_hash) {
+        // This track's tags have changed. Since the filepath is exactly the
+        // same, we assume this is a legitimate correction. Update the
+        // database.
+        ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash,
+                 new_hash);
+        dbRemoveIndexes(track);
+
+        track->tags_hash = new_hash;
+        dbIngestTagHashes(*tags, track->individual_tag_hashes);
+        dbPutTrackData(*track);
+        dbPutHash(new_hash, track->id);
+      }
+    }
+  }
+
+  // Stage 2: search for newly added files.
+  ESP_LOGI(kTag, "scanning for new tracks");
+  uint64_t num_processed = 0;
+  file_gatherer_.FindFiles("", [&](std::string_view path, const FILINFO& info) {
+    num_processed++;
+    events::Ui().Dispatch(event::UpdateProgress{
+        .stage = event::UpdateProgress::Stage::kScanningForNewTracks,
+        .val = num_processed,
+    });
+
+    std::string unused;
+    if (db_->Get(read_options, EncodePathKey(path), &unused).ok()) {
+      // This file is already in the database; skip it.
+      return;
+    }
+
+    std::shared_ptr<TrackTags> tags = tag_parser_.ReadAndParseTags(path);
+    if (!tags || tags->encoding() == Container::kUnsupported) {
+      // No parseable tags; skip this fiile.
+      return;
+    }
+
+    // Check for any existing record with the same hash.
+    uint64_t hash = tags->Hash();
+    std::string key = EncodeHashKey(hash);
+    std::optional<TrackId> existing_hash;
+    std::string raw_entry;
+    if (db_->Get(leveldb::ReadOptions(), key, &raw_entry).ok()) {
+      existing_hash = ParseHashValue(raw_entry);
+    }
+
+    std::pair<uint16_t, uint16_t> modified{info.fdate, info.ftime};
+    if (!existing_hash) {
+      // We've never met this track before! Or we have, but the entry is
+      // malformed. Either way, record this as a new track.
+      TrackId id = dbMintNewTrackId();
+      ESP_LOGI(kTag, "recording new 0x%lx", id);
+
+      auto data = std::make_shared<TrackData>();
+      data->id = id;
+      data->filepath = path;
+      data->tags_hash = hash;
+      data->modified_at = modified;
+      dbIngestTagHashes(*tags, data->individual_tag_hashes);
+
+      dbPutTrackData(*data);
+      dbPutHash(hash, id);
+      auto t = std::make_shared<Track>(data, tags);
+      dbCreateIndexesForTrack(*t);
+      db_->Put(leveldb::WriteOptions{}, EncodePathKey(path),
+               TrackIdToBytes(id));
+      return;
+    }
+
+    std::shared_ptr<TrackData> existing_data = dbGetTrackData(*existing_hash);
+    if (!existing_data) {
+      // We found a hash that matches, but there's no data record? Weird.
+      auto new_data = std::make_shared<TrackData>();
+      new_data->id = dbMintNewTrackId();
+      new_data->filepath = path;
+      new_data->tags_hash = hash;
+      new_data->modified_at = modified;
+      dbIngestTagHashes(*tags, new_data->individual_tag_hashes);
+      dbPutTrackData(*new_data);
+      auto t = std::make_shared<Track>(new_data, tags);
+      dbCreateIndexesForTrack(*t);
+      db_->Put(leveldb::WriteOptions{}, EncodePathKey(path),
+               TrackIdToBytes(new_data->id));
+      return;
+    }
+
+    if (existing_data->is_tombstoned) {
+      ESP_LOGI(kTag, "exhuming track %lu", existing_data->id);
+      existing_data->is_tombstoned = false;
+      existing_data->modified_at = modified;
+      dbPutTrackData(*existing_data);
+      auto t = std::make_shared<Track>(existing_data, tags);
+      dbCreateIndexesForTrack(*t);
+      db_->Put(leveldb::WriteOptions{}, EncodePathKey(path),
+               TrackIdToBytes(existing_data->id));
+    } else if (existing_data->filepath !=
+               std::pmr::string{path.data(), path.size()}) {
+      ESP_LOGW(kTag, "hash collision: %s, %s, %s",
+               tags->title().value_or("no title").c_str(),
+               tags->artist().value_or("no artist").c_str(),
+               tags->album().value_or("no album").c_str());
+    }
+  });
+}
+
+auto Database::isUpdating() -> bool {
+  return is_updating_;
+}
+
+auto Database::dbMintNewTrackId() -> TrackId {
+  TrackId next_id = 1;
+  std::string val;
+  auto status = db_->Get(leveldb::ReadOptions(), kKeyTrackId, &val);
+  if (status.ok()) {
+    next_id = BytesToTrackId(val).value_or(next_id);
+  } else if (!status.IsNotFound()) {
+    // TODO(jacqueline): Handle this more.
+    ESP_LOGE(kTag, "failed to get next track id");
+  }
+
+  if (!db_->Put(leveldb::WriteOptions(), kKeyTrackId,
+                TrackIdToBytes(next_id + 1))
+           .ok()) {
+    ESP_LOGE(kTag, "failed to write next track id");
+  }
+
+  return next_id;
+}
+
+auto Database::dbEntomb(TrackId id, uint64_t hash) -> void {
+  std::string key = EncodeHashKey(hash);
+  std::string val = EncodeHashValue(id);
+  if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) {
+    ESP_LOGE(kTag, "failed to entomb #%llx (id #%lx)", hash, id);
+  }
+}
+
+auto Database::dbPutTrackData(const TrackData& s) -> void {
+  std::string key = EncodeDataKey(s.id);
+  std::string val = EncodeDataValue(s);
+  if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) {
+    ESP_LOGE(kTag, "failed to write data for #%lx", s.id);
+  }
+}
+
+auto Database::dbGetTrackData(TrackId id) -> std::shared_ptr<TrackData> {
+  std::string key = EncodeDataKey(id);
+  std::string raw_val;
+  if (!db_->Get(leveldb::ReadOptions(), key, &raw_val).ok()) {
+    ESP_LOGW(kTag, "no key found for #%lx", id);
+    return {};
+  }
+  return ParseDataValue(raw_val);
+}
+
+auto Database::dbPutHash(const uint64_t& hash, TrackId i) -> void {
+  std::string key = EncodeHashKey(hash);
+  std::string val = EncodeHashValue(i);
+  if (!db_->Put(leveldb::WriteOptions(), key, val).ok()) {
+    ESP_LOGE(kTag, "failed to write hash for #%lx", i);
+  }
+}
+
+auto Database::dbGetHash(const uint64_t& hash) -> std::optional<TrackId> {
+  std::string key = EncodeHashKey(hash);
+  std::string raw_val;
+  if (!db_->Get(leveldb::ReadOptions(), key, &raw_val).ok()) {
+    ESP_LOGW(kTag, "no key found for hash #%llx", hash);
+    return {};
+  }
+  return ParseHashValue(raw_val);
+}
+
+auto Database::dbCreateIndexesForTrack(const Track& track) -> void {
+  for (const IndexInfo& index : getIndexes()) {
+    leveldb::WriteBatch writes;
+    auto entries = Index(collator_, index, track);
+    for (const auto& it : entries) {
+      writes.Put(EncodeIndexKey(it.first),
+                 {it.second.data(), it.second.size()});
+    }
+    db_->Write(leveldb::WriteOptions(), &writes);
+  }
+}
+
+auto Database::dbRemoveIndexes(std::shared_ptr<TrackData> data) -> void {
+  auto tags = dbRecoverTagsFromHashes(data->individual_tag_hashes);
+  if (!tags) {
+    return;
+  }
+  Track track{data, tags};
+  for (const IndexInfo& index : getIndexes()) {
+    auto entries = Index(collator_, index, track);
+    for (auto it = entries.rbegin(); it != entries.rend(); it++) {
+      auto key = EncodeIndexKey(it->first);
+      auto status = db_->Delete(leveldb::WriteOptions{}, key);
+      if (!status.ok()) {
+        return;
+      }
+
+      std::unique_ptr<leveldb::Iterator> cursor{db_->NewIterator({})};
+      cursor->Seek(key);
+      cursor->Prev();
+
+      auto prev_key = ParseIndexKey(cursor->key());
+      if (prev_key && prev_key->header == it->first.header) {
+        break;
+      }
+
+      cursor->Next();
+      auto next_key = ParseIndexKey(cursor->key());
+      if (next_key && next_key->header == it->first.header) {
+        break;
+      }
+    }
+  }
+}
+
+auto Database::dbIngestTagHashes(const TrackTags& tags,
+                                 std::pmr::unordered_map<Tag, uint64_t>& out)
+    -> void {
+  leveldb::WriteBatch batch{};
+  for (const auto& tag : tags.allPresent()) {
+    auto val = tags.get(tag);
+    auto hash = tagHash(val);
+    batch.Put(EncodeTagHashKey(hash), tagToString(val));
+    out[tag] = hash;
+  }
+  db_->Write(leveldb::WriteOptions{}, &batch);
+}
+
+auto Database::dbRecoverTagsFromHashes(
+    const std::pmr::unordered_map<Tag, uint64_t>& hashes)
+    -> std::shared_ptr<TrackTags> {
+  auto out = std::make_shared<TrackTags>();
+  for (const auto& entry : hashes) {
+    std::string value;
+    auto res = db_->Get(leveldb::ReadOptions{}, EncodeTagHashKey(entry.second),
+                        &value);
+    if (!res.ok()) {
+      ESP_LOGI(kTag, "failed to retrieve tag!");
+      continue;
+    }
+    out->set(entry.first, {value.data(), value.size()});
+  }
+  return out;
+}
+
+auto seekToOffset(leveldb::Iterator* it, int offset) {
+  while (it->Valid() && offset != 0) {
+    if (offset < 0) {
+      it->Prev();
+      offset++;
+    } else {
+      it->Next();
+      offset--;
+    }
+  }
+}
+
+auto Database::getRecord(const SearchKey& c)
+    -> std::optional<std::pair<std::pmr::string, Record>> {
+  std::unique_ptr<leveldb::Iterator> it{
+      db_->NewIterator(leveldb::ReadOptions{})};
+
+  it->Seek(c.startKey());
+  seekToOffset(it.get(), c.offset);
+  if (!it->Valid() || !it->key().starts_with(std::string_view{c.prefix})) {
+    return {};
+  }
+
+  std::optional<IndexKey> key = ParseIndexKey(it->key());
+  if (!key) {
+    ESP_LOGW(kTag, "parsing index key failed");
+    return {};
+  }
+
+  return std::make_pair(std::pmr::string{it->key().data(), it->key().size(),
+                                         &memory::kSpiRamResource},
+                        Record{*key, it->value()});
+}
+
+auto Database::countRecords(const SearchKey& c) -> size_t {
+  std::unique_ptr<leveldb::Iterator> it{
+      db_->NewIterator(leveldb::ReadOptions{})};
+
+  it->Seek(c.startKey());
+  seekToOffset(it.get(), c.offset);
+  if (!it->Valid() || !it->key().starts_with(std::string_view{c.prefix})) {
+    return {};
+  }
+
+  size_t count = 0;
+  while (it->Valid() && it->key().starts_with(std::string_view{c.prefix})) {
+    it->Next();
+    count++;
+  }
+
+  return count;
+}
+
+auto SearchKey::startKey() const -> std::string_view {
+  if (key) {
+    return *key;
+  }
+  return prefix;
+}
+
+Record::Record(const IndexKey& key, const leveldb::Slice& t)
+    : text_(t.data(), t.size(), &memory::kSpiRamResource) {
+  if (key.track) {
+    contents_ = *key.track;
+  } else {
+    contents_ = ExpandHeader(key.header, key.item);
+  }
+}
+
+auto Record::text() const -> std::string_view {
+  return text_;
+}
+
+auto Record::contents() const
+    -> const std::variant<TrackId, IndexKey::Header>& {
+  return contents_;
+}
+
+Iterator::Iterator(std::shared_ptr<Database> db, IndexId idx)
+    : Iterator(db,
+               IndexKey::Header{
+                   .id = idx,
+                   .depth = 0,
+                   .components_hash = 0,
+               }) {}
+
+Iterator::Iterator(std::shared_ptr<Database> db, const IndexKey::Header& header)
+    : db_(db), key_{}, current_() {
+  std::string prefix = EncodeIndexPrefix(header);
+  key_ = {
+      .prefix = {prefix.data(), prefix.size(), &memory::kSpiRamResource},
+      .key = {},
+      .offset = 0,
+  };
+  iterate(key_);
+}
+
+auto Iterator::value() const -> const std::optional<Record>& {
+  return current_;
+}
+
+auto Iterator::next() -> void {
+  SearchKey new_key = key_;
+  new_key.offset = 1;
+  iterate(new_key);
+}
+
+auto Iterator::prev() -> void {
+  SearchKey new_key = key_;
+  new_key.offset = -1;
+  iterate(new_key);
+}
+
+auto Iterator::iterate(const SearchKey& key) -> void {
+  auto db = db_.lock();
+  if (!db) {
+    ESP_LOGW(kTag, "iterate with dead db");
+    return;
+  }
+  auto res = db->getRecord(key);
+  if (res) {
+    key_ = {
+        .prefix = key_.prefix,
+        .key = res->first,
+        .offset = 0,
+    };
+    current_ = res->second;
+  } else {
+    key_ = key;
+    current_.reset();
+  }
+}
+
+auto Iterator::count() const -> size_t {
+  auto db = db_.lock();
+  if (!db) {
+    ESP_LOGW(kTag, "count with dead db");
+    return 0;
+  }
+  return db->countRecords(key_);
+}
+
+TrackIterator::TrackIterator(const Iterator& it) : db_(it.db_), levels_() {
+  levels_.push_back(it);
+  next(false);
+}
+
+auto TrackIterator::next() -> void {
+  next(true);
+}
+
+auto TrackIterator::next(bool advance) -> void {
+  while (!levels_.empty()) {
+    if (advance) {
+      levels_.back().next();
+    }
+
+    auto& cur = levels_.back().value();
+    if (!cur) {
+      // The current top iterator is out of tracks. Pop it, and move the parent
+      // to the next item.
+      levels_.pop_back();
+      advance = true;
+    } else if (std::holds_alternative<IndexKey::Header>(cur->contents())) {
+      // This record is a branch. Push a new iterator.
+      auto key = std::get<IndexKey::Header>(cur->contents());
+      auto db = db_.lock();
+      if (!db) {
+        return;
+      }
+      levels_.emplace_back(db, key);
+      // Don't skip the first value of the new level.
+      advance = false;
+    } else if (std::holds_alternative<TrackId>(cur->contents())) {
+      // New record is a leaf.
+      break;
+    }
+  }
+}
+
+auto TrackIterator::value() const -> std::optional<TrackId> {
+  if (levels_.empty()) {
+    return {};
+  }
+  auto cur = levels_.back().value();
+  if (!cur) {
+    return {};
+  }
+  if (std::holds_alternative<TrackId>(cur->contents())) {
+    return std::get<TrackId>(cur->contents());
+  }
+  return {};
+}
+
+auto TrackIterator::count() const -> size_t {
+  size_t size = 0;
+  TrackIterator copy{*this};
+  while (!copy.levels_.empty()) {
+    size += copy.levels_.back().count();
+    copy.levels_.pop_back();
+    copy.next();
+  }
+  return size;
+}
+
+}  // namespace database
diff --git a/src/tangara/database/database.hpp b/src/tangara/database/database.hpp
new file mode 100644
index 00000000..35b76a13
--- /dev/null
+++ b/src/tangara/database/database.hpp
@@ -0,0 +1,244 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <sys/_stdint.h>
+#include <cstdint>
+#include <future>
+#include <memory>
+#include <optional>
+#include <stack>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "collation.hpp"
+#include "cppbor.h"
+#include "file_gatherer.hpp"
+#include "index.hpp"
+#include "leveldb/cache.h"
+#include "leveldb/db.h"
+#include "leveldb/iterator.h"
+#include "leveldb/options.h"
+#include "leveldb/slice.h"
+#include "memory_resource.hpp"
+#include "records.hpp"
+#include "result.hpp"
+#include "tag_parser.hpp"
+#include "tasks.hpp"
+#include "track.hpp"
+
+namespace database {
+
+const uint8_t kCurrentDbVersion = 6;
+
+struct SearchKey;
+class Record;
+class Iterator;
+
+/*
+ * Handle to an open database. This can be used to store large amounts of
+ * persistent data on the SD card, in a manner that can be retrieved later very
+ * quickly.
+ *
+ * A database includes a number of 'indexes'. Each index is a sorted,
+ * hierarchical view of all the playable tracks on the device.
+ */
+class Database {
+ public:
+  enum DatabaseError {
+    ALREADY_OPEN,
+    FAILED_TO_OPEN,
+  };
+  static auto Open(IFileGatherer& file_gatherer,
+                   ITagParser& tag_parser,
+                   locale::ICollator& collator,
+                   tasks::WorkerPool& bg_worker)
+      -> cpp::result<Database*, DatabaseError>;
+
+  static auto Destroy() -> void;
+
+  ~Database();
+
+  auto schemaVersion() -> std::string;
+
+  auto sizeOnDiskBytes() -> size_t;
+
+  /* Adds an arbitrary record to the database. */
+  auto put(const std::string& key, const std::string& val) -> void;
+
+  /* Retrives a value previously stored with `put`. */
+  auto get(const std::string& key) -> std::optional<std::string>;
+
+  auto getTrackPath(TrackId id) -> std::optional<std::string>;
+  auto getTrack(TrackId id) -> std::shared_ptr<Track>;
+
+  auto getIndexes() -> std::vector<IndexInfo>;
+  auto updateIndexes() -> void;
+  auto isUpdating() -> bool;
+
+  // Cannot be copied or moved.
+  Database(const Database&) = delete;
+  Database& operator=(const Database&) = delete;
+
+ private:
+  friend class Iterator;
+
+  // Owned. Dumb pointers because destruction needs to be done in an explicit
+  // order.
+  leveldb::DB* db_;
+  leveldb::Cache* cache_;
+
+  // Not owned.
+  IFileGatherer& file_gatherer_;
+  ITagParser& tag_parser_;
+  locale::ICollator& collator_;
+
+  std::atomic<bool> is_updating_;
+
+  Database(leveldb::DB* db,
+           leveldb::Cache* cache,
+           IFileGatherer& file_gatherer,
+           ITagParser& tag_parser,
+           locale::ICollator& collator);
+
+  auto dbMintNewTrackId() -> TrackId;
+
+  auto dbEntomb(TrackId track, uint64_t hash) -> void;
+  auto dbPutTrackData(const TrackData& s) -> void;
+  auto dbGetTrackData(TrackId id) -> std::shared_ptr<TrackData>;
+  auto dbPutHash(const uint64_t& hash, TrackId i) -> void;
+  auto dbGetHash(const uint64_t& hash) -> std::optional<TrackId>;
+
+  auto dbCreateIndexesForTrack(const Track& track) -> void;
+  auto dbRemoveIndexes(std::shared_ptr<TrackData>) -> void;
+
+  auto dbIngestTagHashes(const TrackTags&,
+                         std::pmr::unordered_map<Tag, uint64_t>&) -> void;
+  auto dbRecoverTagsFromHashes(const std::pmr::unordered_map<Tag, uint64_t>&)
+      -> std::shared_ptr<TrackTags>;
+
+  auto getRecord(const SearchKey& c)
+      -> std::optional<std::pair<std::pmr::string, Record>>;
+  auto countRecords(const SearchKey& c) -> size_t;
+};
+
+/*
+ * Container for the data needed to iterate through database records. This is a
+ * lower-level type that the higher-level iterators are built from; most users
+ * outside this namespace shouldn't need to work with continuations.
+ */
+struct SearchKey {
+  std::pmr::string prefix;
+  /* If not given, then iteration starts from `prefix`. */
+  std::optional<std::pmr::string> key;
+  int offset;
+
+  auto startKey() const -> std::string_view;
+};
+
+/*
+ * A record belonging to one of the database's indexes. This may either be a
+ * leaf record, containing a track id, or a branch record, containing a new
+ * Header to retrieve results at the next level of the index.
+ */
+class Record {
+ public:
+  Record(const IndexKey&, const leveldb::Slice&);
+
+  Record(const Record&) = default;
+  Record& operator=(const Record& other) = default;
+
+  auto text() const -> std::string_view;
+  auto contents() const -> const std::variant<TrackId, IndexKey::Header>&;
+
+ private:
+  std::pmr::string text_;
+  std::variant<TrackId, IndexKey::Header> contents_;
+};
+
+/*
+ * Utility for accessing a large set of database records, one record at a time.
+ */
+class Iterator {
+ public:
+  Iterator(std::shared_ptr<Database>, IndexId);
+  Iterator(std::shared_ptr<Database>, const IndexKey::Header&);
+
+  Iterator(const Iterator&) = default;
+  Iterator& operator=(const Iterator& other) = default;
+
+  auto value() const -> const std::optional<Record>&;
+  std::optional<Record> operator*() const { return value(); }
+
+  auto next() -> void;
+  std::optional<Record> operator++() {
+    next();
+    return value();
+  }
+  std::optional<Record> operator++(int) {
+    auto val = value();
+    next();
+    return val;
+  }
+
+  auto prev() -> void;
+  std::optional<Record> operator--() {
+    prev();
+    return value();
+  }
+  std::optional<Record> operator--(int) {
+    auto val = value();
+    prev();
+    return val;
+  }
+
+  auto count() const -> size_t;
+
+ private:
+  auto iterate(const SearchKey& key) -> void;
+
+  friend class TrackIterator;
+
+  std::weak_ptr<Database> db_;
+  SearchKey key_;
+  std::optional<Record> current_;
+};
+
+class TrackIterator {
+ public:
+  TrackIterator(const Iterator&);
+
+  TrackIterator(const TrackIterator&) = default;
+  TrackIterator& operator=(TrackIterator&& other) = default;
+
+  auto value() const -> std::optional<TrackId>;
+  std::optional<TrackId> operator*() const { return value(); }
+
+  auto next() -> void;
+  std::optional<TrackId> operator++() {
+    next();
+    return value();
+  }
+  std::optional<TrackId> operator++(int) {
+    auto val = value();
+    next();
+    return val;
+  }
+
+  auto count() const -> size_t;
+
+ private:
+  TrackIterator(std::weak_ptr<Database>);
+  auto next(bool advance) -> void;
+
+  std::weak_ptr<Database> db_;
+  std::vector<Iterator> levels_;
+};
+
+}  // namespace database
diff --git a/src/tangara/database/db_events.hpp b/src/tangara/database/db_events.hpp
new file mode 100644
index 00000000..a1aefc27
--- /dev/null
+++ b/src/tangara/database/db_events.hpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include "tinyfsm.hpp"
+
+namespace database {
+namespace event {
+
+struct UpdateStarted : tinyfsm::Event {};
+
+struct UpdateFinished : tinyfsm::Event {};
+
+struct UpdateProgress : tinyfsm::Event {
+  enum class Stage {
+    kVerifyingExistingTracks,
+    kScanningForNewTracks,
+  };
+  Stage stage;
+  uint64_t val;
+};
+
+}  // namespace event
+}  // namespace database
diff --git a/src/tangara/database/env_esp.cpp b/src/tangara/database/env_esp.cpp
new file mode 100644
index 00000000..f7a5637a
--- /dev/null
+++ b/src/tangara/database/env_esp.cpp
@@ -0,0 +1,497 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "env_esp.hpp"
+
+#include <atomic>
+#include <cerrno>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <set>
+#include <string>
+#include <thread>
+#include <type_traits>
+#include <utility>
+
+#include "esp_heap_caps.h"
+#include "esp_log.h"
+#include "ff.h"
+#include "freertos/FreeRTOS.h"
+#include "freertos/portmacro.h"
+#include "freertos/projdefs.h"
+#include "freertos/queue.h"
+#include "freertos/task.h"
+#include "leveldb/env.h"
+#include "leveldb/slice.h"
+#include "leveldb/status.h"
+
+#include "spi.hpp"
+#include "tasks.hpp"
+
+namespace leveldb {
+
+tasks::WorkerPool *sBackgroundThread = nullptr;
+
+std::string ErrToStr(FRESULT err) {
+  switch (err) {
+    case FR_OK:
+      return "FR_OK";
+    case FR_DISK_ERR:
+      return "FR_DISK_ERR";
+    case FR_INT_ERR:
+      return "FR_INT_ERR";
+    case FR_NOT_READY:
+      return "FR_NOT_READY";
+    case FR_NO_FILE:
+      return "FR_NO_FILE";
+    case FR_NO_PATH:
+      return "FR_NO_PATH";
+    case FR_INVALID_NAME:
+      return "FR_INVALID_NAME";
+    case FR_DENIED:
+      return "FR_DENIED";
+    case FR_EXIST:
+      return "FR_EXIST";
+    case FR_INVALID_OBJECT:
+      return "FR_INVALID_OBJECT";
+    case FR_WRITE_PROTECTED:
+      return "FR_WRITE_PROTECTED";
+    case FR_INVALID_DRIVE:
+      return "FR_INVALID_DRIVE";
+    case FR_NOT_ENABLED:
+      return "FR_NOT_ENABLED";
+    case FR_NO_FILESYSTEM:
+      return "FR_NO_FILESYSTEM";
+    case FR_MKFS_ABORTED:
+      return "FR_MKFS_ABORTED";
+    case FR_TIMEOUT:
+      return "FR_TIMEOUT";
+    case FR_LOCKED:
+      return "FR_LOCKED";
+    case FR_NOT_ENOUGH_CORE:
+      return "FR_NOT_ENOUGH_CORE";
+    case FR_TOO_MANY_OPEN_FILES:
+      return "FR_TOO_MANY_OPEN_FILES";
+    case FR_INVALID_PARAMETER:
+      return "FR_INVALID_PARAMETER";
+    default:
+      return "UNKNOWN";
+  }
+}
+
+Status EspError(const std::string& context, FRESULT err) {
+  if (err == FR_NO_FILE) {
+    return Status::NotFound(context, ErrToStr(err));
+  } else {
+    return Status::IOError(context, ErrToStr(err));
+  }
+}
+
+class EspSequentialFile final : public SequentialFile {
+ public:
+  EspSequentialFile(const std::string& filename, FIL file)
+      : file_(file), filename_(filename) {}
+  ~EspSequentialFile() override {
+    auto lock = drivers::acquire_spi();
+    f_close(&file_);
+  }
+
+  Status Read(size_t n, Slice* result, char* scratch) override {
+    auto lock = drivers::acquire_spi();
+    UINT read_size = 0;
+    FRESULT res = f_read(&file_, scratch, n, &read_size);
+    if (res != FR_OK) {  // Read error.
+      return EspError(filename_, res);
+    }
+    *result = Slice(scratch, read_size);
+    return Status::OK();
+  }
+
+  Status Skip(uint64_t n) override {
+    auto lock = drivers::acquire_spi();
+    DWORD current_pos = f_tell(&file_);
+    FRESULT res = f_lseek(&file_, current_pos + n);
+    if (res != FR_OK) {
+      return EspError(filename_, res);
+    }
+    return Status::OK();
+  }
+
+ private:
+  FIL file_;
+  const std::string filename_;
+};
+
+// Implements random read access in a file using pread().
+//
+// Instances of this class are thread-safe, as required by the RandomAccessFile
+// API. Instances are immutable and Read() only calls thread-safe library
+// functions.
+class EspRandomAccessFile final : public RandomAccessFile {
+ public:
+  // The new instance takes ownership of |fd|. |fd_limiter| must outlive this
+  // instance, and will be used to determine if .
+  explicit EspRandomAccessFile(const std::string& filename)
+      : filename_(std::move(filename)) {}
+
+  ~EspRandomAccessFile() override {}
+
+  Status Read(uint64_t offset,
+              size_t n,
+              Slice* result,
+              char* scratch) const override {
+    auto lock = drivers::acquire_spi();
+    FIL file;
+    FRESULT res = f_open(&file, filename_.c_str(), FA_READ);
+    if (res != FR_OK) {
+      return EspError(filename_, res);
+    }
+
+    res = f_lseek(&file, offset);
+    if (res != FR_OK) {
+      return EspError(filename_, res);
+    }
+
+    Status status;
+    UINT read_size = 0;
+    res = f_read(&file, scratch, n, &read_size);
+    if (res != FR_OK || read_size == 0) {
+      return EspError(filename_, res);
+    }
+    *result = Slice(scratch, read_size);
+
+    f_close(&file);
+
+    return status;
+  }
+
+ private:
+  const std::string filename_;
+};
+
+// TODO(jacqueline): LevelDB expects writes to this class to be buffered in
+// memory. FatFs already does in-memory buffering, but we should think about
+// whether to layer more on top.
+class EspWritableFile final : public WritableFile {
+ public:
+  EspWritableFile(std::string filename, FIL file)
+      : filename_(std::move(filename)), file_(file), is_open_(true) {}
+
+  ~EspWritableFile() override {
+    if (is_open_) {
+      // Ignoring any potential errors
+      Close();
+    }
+  }
+
+  Status Append(const Slice& data) override {
+    if (!is_open_) {
+      return EspError(filename_, FR_NOT_ENABLED);
+    }
+
+    auto lock = drivers::acquire_spi();
+    size_t write_size = data.size();
+    const char* write_data = data.data();
+
+    UINT bytes_written = 0;
+    FRESULT res = f_write(&file_, write_data, write_size, &bytes_written);
+    if (res != FR_OK) {
+      return EspError(filename_, res);
+    }
+
+    return Status::OK();
+  }
+
+  Status Close() override {
+    auto lock = drivers::acquire_spi();
+    is_open_ = false;
+    FRESULT res = f_close(&file_);
+    if (res != FR_OK) {
+      return EspError(filename_, res);
+    }
+    return Status::OK();
+  }
+
+  Status Flush() override { return Sync(); }
+
+  Status Sync() override {
+    if (!is_open_) {
+      return EspError(filename_, FR_NOT_ENABLED);
+    }
+    auto lock = drivers::acquire_spi();
+    FRESULT res = f_sync(&file_);
+    if (res != FR_OK) {
+      return EspError(filename_, res);
+    }
+    return Status::OK();
+  }
+
+ private:
+  const std::string filename_;
+  FIL file_;
+  bool is_open_;
+};
+
+class EspFileLock : public FileLock {
+ public:
+  explicit EspFileLock(const std::string& filename) : filename_(filename) {}
+  const std::string& filename() { return filename_; }
+
+ private:
+  const std::string filename_;
+};
+
+class EspLogger final : public Logger {
+ public:
+  explicit EspLogger(FIL file) : file_(file) {}
+  ~EspLogger() override { f_close(&file_); }
+
+  void Logv(const char* format, std::va_list ap) override {
+    /*
+    std::va_list args_copy;
+    va_copy(args_copy, ap);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+    std::size_t bytes_needed = snprintf(NULL, 0, format, args_copy);
+    char* output = reinterpret_cast<char*>(
+        heap_caps_calloc(bytes_needed, 1, MALLOC_CAP_SPIRAM));
+    snprintf(output, bytes_needed, format, args_copy);
+#pragma GCC diagnostic pop
+    va_end(args_copy);
+    ESP_LOGI("LEVELDB", "%s", output);
+    // f_puts(output, &file_);
+    free(reinterpret_cast<void*>(output));
+    */
+  }
+
+ private:
+  FIL file_;
+};
+
+EspEnv::~EspEnv() {
+  ESP_LOGE("LEVELDB", "EspEnv singleton destroyed. Unsupported behavior!");
+}
+
+Status EspEnv::NewSequentialFile(const std::string& filename,
+                                 SequentialFile** result) {
+  auto lock = drivers::acquire_spi();
+  FIL file;
+  FRESULT res = f_open(&file, filename.c_str(), FA_READ);
+  if (res != FR_OK) {
+    *result = nullptr;
+    return EspError(filename, res);
+  }
+
+  *result = new EspSequentialFile(filename, file);
+  return Status::OK();
+}
+
+Status EspEnv::NewRandomAccessFile(const std::string& filename,
+                                   RandomAccessFile** result) {
+  auto lock = drivers::acquire_spi();
+  // EspRandomAccessFile doesn't try to open the file until it's needed, so
+  // we need to first ensure the file exists to handle the NotFound case
+  // correctly.
+  FILINFO info;
+  FRESULT res = f_stat(filename.c_str(), &info);
+  if (res != FR_OK) {
+    *result = nullptr;
+    return EspError(filename, res);
+  }
+
+  *result = new EspRandomAccessFile(filename);
+  return Status::OK();
+}
+
+Status EspEnv::NewWritableFile(const std::string& filename,
+                               WritableFile** result) {
+  auto lock = drivers::acquire_spi();
+  FIL file;
+  FRESULT res = f_open(&file, filename.c_str(), FA_WRITE | FA_CREATE_ALWAYS);
+  if (res != FR_OK) {
+    *result = nullptr;
+    return EspError(filename, res);
+  }
+
+  *result = new EspWritableFile(filename, file);
+  return Status::OK();
+}
+
+Status EspEnv::NewAppendableFile(const std::string& filename,
+                                 WritableFile** result) {
+  auto lock = drivers::acquire_spi();
+  FIL file;
+  FRESULT res = f_open(&file, filename.c_str(), FA_WRITE | FA_OPEN_APPEND);
+  if (res != FR_OK) {
+    *result = nullptr;
+    return EspError(filename, res);
+  }
+
+  *result = new EspWritableFile(filename, file);
+  return Status::OK();
+}
+
+bool EspEnv::FileExists(const std::string& filename) {
+  auto lock = drivers::acquire_spi();
+  FILINFO info;
+  return f_stat(filename.c_str(), &info) == FR_OK;
+}
+
+Status EspEnv::GetChildren(const std::string& directory_path,
+                           std::vector<std::string>* result) {
+  result->clear();
+
+  auto lock = drivers::acquire_spi();
+  FF_DIR dir;
+  FRESULT res = f_opendir(&dir, directory_path.c_str());
+  if (res != FR_OK) {
+    return EspError(directory_path, res);
+  }
+
+  FILINFO info;
+  for (;;) {
+    res = f_readdir(&dir, &info);
+    if (res != FR_OK) {
+      return EspError(directory_path, res);
+    }
+    if (info.fname[0] == 0) {
+      break;
+    }
+    result->emplace_back(info.fname);
+  }
+
+  res = f_closedir(&dir);
+  if (res != FR_OK) {
+    return EspError(directory_path, res);
+  }
+
+  return Status::OK();
+}
+
+Status EspEnv::RemoveFile(const std::string& filename) {
+  auto lock = drivers::acquire_spi();
+  FRESULT res = f_unlink(filename.c_str());
+  if (res != FR_OK) {
+    return EspError(filename, res);
+  }
+  return Status::OK();
+}
+
+Status EspEnv::CreateDir(const std::string& dirname) {
+  auto lock = drivers::acquire_spi();
+  FRESULT res = f_mkdir(dirname.c_str());
+  if (res != FR_OK) {
+    return EspError(dirname, res);
+  }
+  return Status::OK();
+}
+
+Status EspEnv::RemoveDir(const std::string& dirname) {
+  return RemoveFile(dirname);
+}
+
+Status EspEnv::GetFileSize(const std::string& filename, uint64_t* size) {
+  auto lock = drivers::acquire_spi();
+  FILINFO info;
+  FRESULT res = f_stat(filename.c_str(), &info);
+  if (res != FR_OK) {
+    *size = 0;
+    return EspError(filename, res);
+  }
+  *size = info.fsize;
+  return Status::OK();
+}
+
+Status EspEnv::RenameFile(const std::string& from, const std::string& to) {
+  // Match the POSIX behaviour of replacing any existing file.
+  if (FileExists(to)) {
+    Status s = RemoveFile(to);
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  auto lock = drivers::acquire_spi();
+  FRESULT res = f_rename(from.c_str(), to.c_str());
+  if (res != FR_OK) {
+    return EspError(from, res);
+  }
+  return Status::OK();
+}
+
+Status EspEnv::LockFile(const std::string& filename, FileLock** lock) {
+  *lock = nullptr;
+
+  if (!locks_.Insert(filename)) {
+    return Status::IOError("lock " + filename, "already held by process");
+  }
+
+  *lock = new EspFileLock(filename);
+  return Status::OK();
+}
+
+Status EspEnv::UnlockFile(FileLock* lock) {
+  EspFileLock* posix_file_lock = static_cast<EspFileLock*>(lock);
+  locks_.Remove(posix_file_lock->filename());
+  delete posix_file_lock;
+  return Status::OK();
+}
+
+void EspEnv::StartThread(void (*thread_main)(void* thread_main_arg),
+                         void* thread_main_arg) {
+  std::thread new_thread(thread_main, thread_main_arg);
+  new_thread.detach();
+}
+
+Status EspEnv::GetTestDirectory(std::string* result) {
+  CreateDir("/tmp");
+  *result = "/tmp";
+  return Status::OK();
+}
+
+Status EspEnv::NewLogger(const std::string& filename, Logger** result) {
+  auto lock = drivers::acquire_spi();
+  FIL file;
+  FRESULT res = f_open(&file, filename.c_str(), FA_WRITE | FA_OPEN_APPEND);
+  if (res != FR_OK) {
+    *result = nullptr;
+    return EspError(filename, res);
+  }
+
+  *result = new EspLogger(file);
+  return Status::OK();
+}
+
+uint64_t EspEnv::NowMicros() {
+  struct timeval tv_now;
+  gettimeofday(&tv_now, NULL);
+  return (int64_t)tv_now.tv_sec * 1000000L + (int64_t)tv_now.tv_usec;
+}
+
+void EspEnv::SleepForMicroseconds(int micros) {
+  vTaskDelay(pdMS_TO_TICKS(micros / 1000));
+}
+
+EspEnv::EspEnv() {}
+
+void EspEnv::Schedule(
+    void (*background_work_function)(void* background_work_arg),
+    void* background_work_arg) {
+  auto worker = sBackgroundThread;
+  if (worker) {
+    worker->Dispatch<void>(
+        [=]() { std::invoke(background_work_function, background_work_arg); });
+  }
+}
+
+}  // namespace leveldb
diff --git a/src/tangara/database/env_esp.hpp b/src/tangara/database/env_esp.hpp
new file mode 100644
index 00000000..472a72a6
--- /dev/null
+++ b/src/tangara/database/env_esp.hpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <set>
+#include <string>
+
+#include "leveldb/env.h"
+#include "leveldb/status.h"
+
+#include "tasks.hpp"
+
+namespace leveldb {
+
+extern tasks::WorkerPool* sBackgroundThread;
+
+// Tracks the files locked by EspEnv::LockFile().
+//
+// We maintain a separate set instead of relying on fcntl(F_SETLK) because
+// fcntl(F_SETLK) does not provide any protection against multiple uses from the
+// same process.
+//
+// Instances are thread-safe because all member data is guarded by a mutex.
+class InMemoryLockTable {
+ public:
+  bool Insert(const std::string& fname) {
+    mu_.lock();
+    bool succeeded = locked_files_.insert(fname).second;
+    mu_.unlock();
+    return succeeded;
+  }
+  void Remove(const std::string& fname) {
+    mu_.lock();
+    locked_files_.erase(fname);
+    mu_.unlock();
+  }
+
+ private:
+  std::mutex mu_;
+  std::set<std::string> locked_files_;
+};
+
+class EspEnv : public leveldb::Env {
+ public:
+  EspEnv();
+  ~EspEnv() override;
+
+  Status NewSequentialFile(const std::string& filename,
+                           SequentialFile** result) override;
+
+  Status NewRandomAccessFile(const std::string& filename,
+                             RandomAccessFile** result) override;
+
+  Status NewWritableFile(const std::string& filename,
+                         WritableFile** result) override;
+
+  Status NewAppendableFile(const std::string& filename,
+                           WritableFile** result) override;
+
+  bool FileExists(const std::string& filename) override;
+
+  Status GetChildren(const std::string& directory_path,
+                     std::vector<std::string>* result) override;
+
+  Status RemoveFile(const std::string& filename) override;
+
+  Status CreateDir(const std::string& dirname) override;
+
+  Status RemoveDir(const std::string& dirname) override;
+
+  Status GetFileSize(const std::string& filename, uint64_t* size) override;
+
+  Status RenameFile(const std::string& from, const std::string& to) override;
+
+  Status LockFile(const std::string& filename, FileLock** lock) override;
+
+  Status UnlockFile(FileLock* lock) override;
+
+  void Schedule(void (*background_work_function)(void* background_work_arg),
+                void* background_work_arg) override;
+
+  void StartThread(void (*thread_main)(void* thread_main_arg),
+                   void* thread_main_arg) override;
+
+  Status GetTestDirectory(std::string* result) override;
+
+  Status NewLogger(const std::string& filename, Logger** result) override;
+
+  uint64_t NowMicros() override;
+
+  void SleepForMicroseconds(int micros) override;
+
+  void BackgroundThreadMain();
+
+ private:
+  InMemoryLockTable locks_;  // Thread-safe.
+};
+
+}  // namespace leveldb
+
+namespace database {
+
+// Wraps an Env instance whose destructor is never created.
+//
+// Intended usage:
+//   using PlatformSingletonEnv = SingletonEnv<PlatformEnv>;
+//   void ConfigurePosixEnv(int param) {
+//     PlatformSingletonEnv::AssertEnvNotInitialized();
+//     // set global configuration flags.
+//   }
+//   Env* Env::Default() {
+//     static PlatformSingletonEnv default_env;
+//     return default_env.env();
+//   }
+template <typename EnvType>
+class SingletonEnv {
+ public:
+  SingletonEnv() {
+    static_assert(sizeof(env_storage_) >= sizeof(EnvType),
+                  "env_storage_ will not fit the Env");
+    static_assert(alignof(decltype(env_storage_)) >= alignof(EnvType),
+                  "env_storage_ does not meet the Env's alignment needs");
+    new (&env_storage_) EnvType();
+  }
+  ~SingletonEnv() = default;
+
+  SingletonEnv(const SingletonEnv&) = delete;
+  SingletonEnv& operator=(const SingletonEnv&) = delete;
+
+  leveldb::Env* env() { return reinterpret_cast<leveldb::Env*>(&env_storage_); }
+
+ private:
+  typename std::aligned_storage<sizeof(EnvType), alignof(EnvType)>::type
+      env_storage_;
+};
+
+}  // namespace database
diff --git a/src/tangara/database/file_gatherer.cpp b/src/tangara/database/file_gatherer.cpp
new file mode 100644
index 00000000..b7b7271e
--- /dev/null
+++ b/src/tangara/database/file_gatherer.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "file_gatherer.hpp"
+
+#include <deque>
+#include <functional>
+#include <sstream>
+#include <string>
+
+#include "ff.h"
+
+#include "memory_resource.hpp"
+#include "spi.hpp"
+
+namespace database {
+
+static_assert(sizeof(TCHAR) == sizeof(char), "TCHAR must be CHAR");
+
+auto FileGathererImpl::FindFiles(
+    const std::string& root,
+    std::function<void(std::string_view, const FILINFO&)> cb) -> void {
+  std::pmr::deque<std::pmr::string> to_explore{&memory::kSpiRamResource};
+  to_explore.push_back({root.data(), root.size()});
+
+  while (!to_explore.empty()) {
+    auto next_path_str = to_explore.front();
+    to_explore.pop_front();
+
+    const TCHAR* next_path = static_cast<const TCHAR*>(next_path_str.c_str());
+
+    FF_DIR dir;
+    FRESULT res;
+    {
+      auto lock = drivers::acquire_spi();
+      res = f_opendir(&dir, next_path);
+    }
+    if (res != FR_OK) {
+      // TODO: log.
+      continue;
+    }
+
+    for (;;) {
+      FILINFO info;
+      {
+        auto lock = drivers::acquire_spi();
+        res = f_readdir(&dir, &info);
+      }
+      if (res != FR_OK || info.fname[0] == 0) {
+        // No more files in the directory.
+        break;
+      } else if (info.fattrib & (AM_HID | AM_SYS) || info.fname[0] == '.') {
+        // System or hidden file. Ignore it and move on.
+        continue;
+      } else {
+        std::pmr::string full_path{&memory::kSpiRamResource};
+        full_path += next_path_str;
+        full_path += "/";
+        full_path += info.fname;
+
+        if (info.fattrib & AM_DIR) {
+          // This is a directory. Add it to the explore queue.
+          to_explore.push_back(full_path);
+        } else {
+          // This is a file! Let the callback know about it.
+          // std::invoke(cb, full_path.str(), info);
+          std::invoke(cb, full_path, info);
+        }
+      }
+    }
+
+    auto lock = drivers::acquire_spi();
+    f_closedir(&dir);
+  }
+}
+
+}  // namespace database
diff --git a/src/tangara/database/file_gatherer.hpp b/src/tangara/database/file_gatherer.hpp
new file mode 100644
index 00000000..685bdb2c
--- /dev/null
+++ b/src/tangara/database/file_gatherer.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <deque>
+#include <functional>
+#include <sstream>
+#include <string>
+
+#include "ff.h"
+
+namespace database {
+
+class IFileGatherer {
+ public:
+  virtual ~IFileGatherer(){};
+
+  virtual auto FindFiles(
+      const std::string& root,
+      std::function<void(std::string_view, const FILINFO&)> cb)
+      -> void = 0;
+};
+
+class FileGathererImpl : public IFileGatherer {
+ public:
+  virtual auto FindFiles(
+      const std::string& root,
+      std::function<void(std::string_view, const FILINFO&)> cb)
+      -> void override;
+};
+
+}  // namespace database
diff --git a/src/tangara/database/future_fetcher.hpp b/src/tangara/database/future_fetcher.hpp
new file mode 100644
index 00000000..e8ce9729
--- /dev/null
+++ b/src/tangara/database/future_fetcher.hpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <memory>
+#include <utility>
+
+#include "database.hpp"
+
+namespace database {
+
+/*
+ * Utility to simplify waiting for a std::future to complete without blocking.
+ * Each instance is good for a single future, and does not directly own anything
+ * other than the future itself.
+ */
+template <typename T>
+class FutureFetcher {
+ public:
+  explicit FutureFetcher(std::future<T>&& fut)
+      : is_consumed_(false), fut_(std::move(fut)) {}
+
+  /*
+   * Returns whether or not the underlying future is still awaiting async work.
+   */
+  auto Finished() -> bool {
+    if (!fut_.valid()) {
+      return true;
+    }
+    if (fut_.wait_for(std::chrono::seconds(0)) != std::future_status::ready) {
+      return false;
+    }
+    return true;
+  }
+
+  /*
+   * Returns the result of the future, and releases ownership of the underling
+   * resource. Will return an absent value if the future became invalid (e.g.
+   * the promise associated with it was destroyed.)
+   */
+  auto Result() -> std::optional<T> {
+    assert(!is_consumed_);
+    if (is_consumed_) {
+      return {};
+    }
+    is_consumed_ = true;
+    if (!fut_.valid()) {
+      return {};
+    }
+    return fut_.get();
+  }
+
+ private:
+  bool is_consumed_;
+  std::future<T> fut_;
+};
+
+}  // namespace database
diff --git a/src/tangara/database/index.cpp b/src/tangara/database/index.cpp
new file mode 100644
index 00000000..328c3b43
--- /dev/null
+++ b/src/tangara/database/index.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "index.hpp"
+#include <sys/_stdint.h>
+
+#include <cstdint>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <variant>
+#include <vector>
+
+#include "collation.hpp"
+#include "cppbor.h"
+#include "esp_log.h"
+#include "komihash.h"
+#include "leveldb/write_batch.h"
+
+#include "records.hpp"
+#include "track.hpp"
+
+namespace database {
+
+[[maybe_unused]] static const char* kTag = "index";
+
+const IndexInfo kAlbumsByArtist{
+    .id = 1,
+    .name = "Albums by Artist",
+    .components = {Tag::kAlbumArtist, Tag::kAlbum, Tag::kAlbumOrder},
+};
+
+const IndexInfo kTracksByGenre{
+    .id = 2,
+    .name = "Tracks by Genre",
+    .components = {Tag::kGenres, Tag::kTitle},
+};
+
+const IndexInfo kAllTracks{
+    .id = 3,
+    .name = "All Tracks",
+    .components = {Tag::kTitle},
+};
+
+const IndexInfo kAllAlbums{
+    .id = 4,
+    .name = "All Albums",
+    .components = {Tag::kAlbum, Tag::kAlbumOrder},
+};
+
+class Indexer {
+ public:
+  Indexer(locale::ICollator& collator, const Track& t, const IndexInfo& idx)
+      : collator_(collator), track_(t), index_(idx) {}
+
+  auto index() -> std::vector<std::pair<IndexKey, std::string>>;
+
+ private:
+  auto handleLevel(const IndexKey::Header& header,
+                   std::span<const Tag> components) -> void;
+
+  auto handleItem(const IndexKey::Header& header,
+                  std::variant<std::pmr::string, uint32_t> item,
+                  std::span<const Tag> components) -> void;
+
+  auto missing_value(Tag tag) -> TagValue {
+    switch (tag) {
+      case Tag::kTitle:
+        return track_.TitleOrFilename();
+      case Tag::kArtist:
+        return "Unknown Artist";
+      case Tag::kAlbum:
+        return "Unknown Album";
+      case Tag::kAlbumArtist:
+        return track_.tags().artist().value_or("Unknown Artist");
+        return "Unknown Album";
+      case Tag::kGenres:
+        return std::pmr::vector<std::pmr::string>{};
+      case Tag::kDisc:
+        return 0u;
+      case Tag::kTrack:
+        return 0u;
+      case Tag::kAlbumOrder:
+        return 0u;
+    }
+    return std::monostate{};
+  }
+
+  locale::ICollator& collator_;
+  const Track& track_;
+  const IndexInfo index_;
+
+  std::vector<std::pair<IndexKey, std::string>> out_;
+};
+
+auto Indexer::index() -> std::vector<std::pair<IndexKey, std::string>> {
+  out_.clear();
+
+  IndexKey::Header root_header{
+      .id = index_.id,
+      .depth = 0,
+      .components_hash = 0,
+  };
+  handleLevel(root_header, index_.components);
+
+  return out_;
+}
+
+auto Indexer::handleLevel(const IndexKey::Header& header,
+                          std::span<const Tag> components) -> void {
+  Tag component = components.front();
+  TagValue value = track_.tags().get(component);
+  if (std::holds_alternative<std::monostate>(value)) {
+    value = missing_value(component);
+  }
+
+  std::visit(
+      [&](auto&& arg) {
+        using T = std::decay_t<decltype(arg)>;
+        if constexpr (std::is_same_v<T, std::monostate>) {
+          ESP_LOGW(kTag, "dropping component without value: %s",
+                   tagName(components.front()).c_str());
+        } else if constexpr (std::is_same_v<T, std::pmr::string>) {
+          handleItem(header, arg, components);
+        } else if constexpr (std::is_same_v<T, uint32_t>) {
+          handleItem(header, arg, components);
+        } else if constexpr (std::is_same_v<
+                                 T, std::span<const std::pmr::string>>) {
+          for (const auto& i : arg) {
+            handleItem(header, i, components);
+          }
+        }
+      },
+      value);
+}
+
+auto Indexer::handleItem(const IndexKey::Header& header,
+                         std::variant<std::pmr::string, uint32_t> item,
+                         std::span<const Tag> components) -> void {
+  IndexKey key{
+      .header = header,
+      .item = {},
+      .track = {},
+  };
+  std::string value;
+
+  std::string item_text;
+  std::visit(
+      [&](auto&& arg) {
+        using T = std::decay_t<decltype(arg)>;
+        if constexpr (std::is_same_v<T, std::pmr::string>) {
+          value = {arg.data(), arg.size()};
+          auto xfrm = collator_.Transform(value);
+          key.item = {xfrm.data(), xfrm.size()};
+        } else if constexpr (std::is_same_v<T, uint32_t>) {
+          value = std::to_string(arg);
+          // FIXME: this sucks lol. we should just write the number directly,
+          // LSB-first, but then we need to be able to parse it back properly.
+          std::ostringstream str;
+          str << std::setw(8) << std::setfill('0') << arg;
+          std::string encoded = str.str();
+          key.item = {encoded.data(), encoded.size()};
+        }
+      },
+      item);
+
+  std::optional<IndexKey::Header> next_level;
+  if (components.size() == 1) {
+    value = track_.TitleOrFilename();
+    key.track = track_.data().id;
+  } else {
+    next_level = ExpandHeader(key.header, key.item);
+  }
+
+  out_.emplace_back(key, value);
+
+  if (next_level) {
+    handleLevel(*next_level, components.subspan(1));
+  }
+}
+
+auto Index(locale::ICollator& c, const IndexInfo& i, const Track& t)
+    -> std::vector<std::pair<IndexKey, std::string>> {
+  Indexer indexer{c, t, i};
+  return indexer.index();
+}
+
+auto ExpandHeader(const IndexKey::Header& header,
+                  const std::optional<std::pmr::string>& component)
+    -> IndexKey::Header {
+  IndexKey::Header ret{header};
+  ret.depth++;
+  if (component) {
+    ret.components_hash =
+        komihash(component->data(), component->size(), ret.components_hash);
+  } else {
+    ret.components_hash = komihash(NULL, 0, ret.components_hash);
+  }
+  return ret;
+}
+
+}  // namespace database
diff --git a/src/tangara/database/index.hpp b/src/tangara/database/index.hpp
new file mode 100644
index 00000000..45dae464
--- /dev/null
+++ b/src/tangara/database/index.hpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <cstdint>
+#include <string>
+#include <variant>
+#include <vector>
+
+#include "collation.hpp"
+#include "leveldb/db.h"
+#include "leveldb/slice.h"
+
+#include "leveldb/write_batch.h"
+#include "memory_resource.hpp"
+#include "track.hpp"
+
+namespace database {
+
+typedef uint8_t IndexId;
+
+struct IndexInfo {
+  // Unique id for this index
+  IndexId id;
+  // Localised, user-friendly description of this index. e.g. "Albums by Artist"
+  // or "All Tracks".
+  std::pmr::string name;
+  // Specifier for how this index breaks down the database.
+  std::vector<Tag> components;
+};
+
+struct IndexKey {
+  struct Header {
+    // The index that this key was created for.
+    IndexId id;
+    // The number of components of IndexInfo that have already been filtered.
+    // For example, if an index consists of { kGenre, kArtist }, and this key
+    // represents an artist, then depth = 1.
+    std::uint8_t depth;
+    // The cumulative hash of all filtered components, in order. For example, if
+    // an index consists of { kArtist, kAlbum, kTitle }, and we are at depth = 2
+    // then this may contain hash(hash("Jacqueline"), "My Cool Album").
+    std::uint64_t components_hash;
+
+    bool operator==(const Header&) const = default;
+  };
+  Header header;
+
+  // The filterable / selectable item that this key represents. "Jacqueline" for
+  // kArtist, "My Cool Album" for kAlbum, etc.
+  std::optional<std::pmr::string> item;
+  // If this is a leaf component, the track id for this record.
+  // This could reasonably be the value for a record, but we keep it as a part
+  // of the key to help with disambiguation.
+  std::optional<TrackId> track;
+};
+
+auto Index(locale::ICollator&, const IndexInfo&, const Track&)
+    -> std::vector<std::pair<IndexKey, std::string>>;
+
+auto ExpandHeader(const IndexKey::Header&,
+                  const std::optional<std::pmr::string>&) -> IndexKey::Header;
+
+// Predefined indexes
+// TODO(jacqueline): Make these defined at runtime! :)
+
+extern const IndexInfo kAlbumsByArtist;
+extern const IndexInfo kTracksByGenre;
+extern const IndexInfo kAllTracks;
+extern const IndexInfo kAllAlbums;
+
+}  // namespace database
diff --git a/src/tangara/database/records.cpp b/src/tangara/database/records.cpp
new file mode 100644
index 00000000..b086be3b
--- /dev/null
+++ b/src/tangara/database/records.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "records.hpp"
+
+#include <stdint.h>
+#include <sys/_stdint.h>
+
+#include <functional>
+#include <iomanip>
+#include <iostream>
+#include <memory_resource>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "cppbor.h"
+#include "cppbor_parse.h"
+#include "esp_log.h"
+
+#include "index.hpp"
+#include "komihash.h"
+#include "memory_resource.hpp"
+#include "track.hpp"
+
+// As LevelDB is a key-value store, each record in the database consists of a
+// key and an optional value.
+//
+// Values, when present, are always cbor-encoded. This is fast, compact, and
+// very easy to evolve over time due to its inclusion of type information.
+//
+// Keys have a more complicated scheme, as for performance we rely heavily on
+// LevelDB's sorted storage format. We must therefore worry about clustering of
+// similar records, and the sortability of our encoding format.
+//    Each kind of key consists of a a single-byte prefix, then one or more
+// fields separated by null (0) bytes. Each field may be cbor-encoded, or may
+// use some bespoke encoding; it depends on whether we want to be able to sort
+// by that field.
+//    For debugging and discussion purposes, we represent field separators
+// textually as '/', and write each field as its hex encoding. e.g. a data key
+// for the track with id 17 would be written as 'D / 0x11'.
+
+namespace database {
+
+[[maybe_unused]] static const char* kTag = "RECORDS";
+
+static const char kPathPrefix = 'P';
+static const char kDataPrefix = 'D';
+static const char kHashPrefix = 'H';
+static const char kTagHashPrefix = 'T';
+static const char kIndexPrefix = 'I';
+static const char kFieldSeparator = '\0';
+
+static constexpr auto makePrefix(char p) -> std::string {
+  std::string str;
+  str += p;
+  str += kFieldSeparator;
+  return str;
+}
+
+auto EncodePathKey(std::string_view path) -> std::string {
+  std::stringstream out{};
+  out << makePrefix(kPathPrefix);
+  out << path;
+  return out.str();
+}
+
+/* 'D/' */
+auto EncodeDataPrefix() -> std::string {
+  return makePrefix(kDataPrefix);
+}
+
+/* 'D/ 0xACAB' */
+auto EncodeDataKey(const TrackId& id) -> std::string {
+  return EncodeDataPrefix() + TrackIdToBytes(id);
+}
+
+auto EncodeDataValue(const TrackData& track) -> std::string {
+  auto* tag_hashes = new cppbor::Map{};  // Free'd by Array's dtor.
+  for (const auto& entry : track.individual_tag_hashes) {
+    tag_hashes->add(cppbor::Uint{static_cast<uint32_t>(entry.first)},
+                    cppbor::Uint{entry.second});
+  }
+  cppbor::Array val{
+      cppbor::Uint{track.id},
+      cppbor::Tstr{track.filepath},
+      cppbor::Uint{track.tags_hash},
+      cppbor::Bool{track.is_tombstoned},
+      cppbor::Uint{track.modified_at.first},
+      cppbor::Uint{track.modified_at.second},
+      tag_hashes,
+  };
+  return val.toString();
+}
+
+auto ParseDataValue(const leveldb::Slice& slice) -> std::shared_ptr<TrackData> {
+  auto [item, unused, err] = cppbor::parseWithViews(
+      reinterpret_cast<const uint8_t*>(slice.data()), slice.size());
+  if (!item || item->type() != cppbor::ARRAY) {
+    return nullptr;
+  }
+  auto vals = item->asArray();
+  if (vals->size() != 7 || vals->get(0)->type() != cppbor::UINT ||
+      vals->get(1)->type() != cppbor::TSTR ||
+      vals->get(2)->type() != cppbor::UINT ||
+      vals->get(3)->type() != cppbor::SIMPLE ||
+      vals->get(4)->type() != cppbor::UINT ||
+      vals->get(5)->type() != cppbor::UINT ||
+      vals->get(6)->type() != cppbor::MAP) {
+    return {};
+  }
+  auto res = std::make_shared<TrackData>();
+  res->id = vals->get(0)->asUint()->unsignedValue();
+  res->filepath = vals->get(1)->asViewTstr()->view();
+  res->tags_hash = vals->get(2)->asUint()->unsignedValue();
+  res->is_tombstoned = vals->get(3)->asBool()->value();
+  res->modified_at = std::make_pair<uint16_t, uint16_t>(
+      vals->get(4)->asUint()->unsignedValue(),
+      vals->get(5)->asUint()->unsignedValue());
+
+  auto tag_hashes = vals->get(6)->asMap();
+  for (const auto& entry : *tag_hashes) {
+    auto tag = static_cast<Tag>(entry.first->asUint()->unsignedValue());
+    res->individual_tag_hashes[tag] = entry.second->asUint()->unsignedValue();
+  }
+  return res;
+}
+
+/* 'H/ 0xBEEF' */
+auto EncodeHashKey(const uint64_t& hash) -> std::string {
+  return makePrefix(kHashPrefix) + cppbor::Uint{hash}.toString();
+}
+
+auto ParseHashValue(const leveldb::Slice& slice) -> std::optional<TrackId> {
+  return BytesToTrackId({slice.data(), slice.size()});
+}
+
+auto EncodeHashValue(TrackId id) -> std::string {
+  return TrackIdToBytes(id);
+}
+
+/* 'T/ 0xBEEF' */
+auto EncodeTagHashKey(const uint64_t& hash) -> std::string {
+  return makePrefix(kTagHashPrefix) + cppbor::Uint{hash}.toString();
+}
+
+/* 'I/' */
+auto EncodeAllIndexesPrefix() -> std::string {
+  return makePrefix(kIndexPrefix);
+}
+
+auto EncodeIndexPrefix(const IndexKey::Header& header) -> std::string {
+  std::ostringstream out;
+  out << makePrefix(kIndexPrefix);
+  cppbor::Array val{
+      cppbor::Uint{header.id},
+      cppbor::Uint{header.depth},
+      cppbor::Uint{header.components_hash},
+  };
+  out << val.toString() << kFieldSeparator;
+  return out.str();
+}
+
+/*
+ * 'I/0xa2/0x686921/0xb9'
+ *                   ^ --- trailer
+ *          ^ --- component ("hi!")
+ *     ^ -------- header
+ *
+ *  The components *must* be encoded in a way that is easy to sort
+ *  lexicographically. The header and footer do not have this restriction, so
+ *  cbor is fine.
+ *
+ *  We store grouping information within the header; which index, filtered
+ *  components. We store disambiguation information in the trailer; just a track
+ *  id for now, but could reasonably be something like 'release year' as well.
+ */
+auto EncodeIndexKey(const IndexKey& key) -> std::string {
+  std::ostringstream out{};
+
+  out << EncodeIndexPrefix(key.header);
+
+  // The component should already be UTF-8 encoded, so just write it.
+  if (key.item) {
+    out << *key.item << kFieldSeparator;
+  }
+
+  if (key.track) {
+    out << TrackIdToBytes(*key.track);
+  }
+
+  return out.str();
+}
+
+auto ParseIndexKey(const leveldb::Slice& slice) -> std::optional<IndexKey> {
+  IndexKey result{};
+
+  auto prefix = EncodeAllIndexesPrefix();
+  if (!slice.starts_with(prefix)) {
+    return {};
+  }
+
+  std::string key_data = slice.ToString().substr(prefix.size());
+  auto [key, end_of_key, err] = cppbor::parseWithViews(
+      reinterpret_cast<const uint8_t*>(key_data.data()), key_data.size());
+  if (!key || key->type() != cppbor::ARRAY) {
+    return {};
+  }
+  auto as_array = key->asArray();
+  if (as_array->size() != 3 || as_array->get(0)->type() != cppbor::UINT ||
+      as_array->get(1)->type() != cppbor::UINT ||
+      as_array->get(2)->type() != cppbor::UINT) {
+    return {};
+  }
+  result.header.id = as_array->get(0)->asUint()->unsignedValue();
+  result.header.depth = as_array->get(1)->asUint()->unsignedValue();
+  result.header.components_hash = as_array->get(2)->asUint()->unsignedValue();
+
+  size_t header_length =
+      reinterpret_cast<const char*>(end_of_key) - key_data.data();
+
+  if (header_length == 0 || header_length >= key_data.size()) {
+    return {};
+  }
+
+  std::istringstream in(key_data.substr(header_length + 1));
+  std::stringbuf buffer{};
+
+  in.get(buffer, kFieldSeparator);
+  if (buffer.str().size() > 0) {
+    result.item = buffer.str();
+  }
+
+  buffer = {};
+  in.get(buffer);
+  std::string id_str = buffer.str();
+  if (id_str.size() > 1) {
+    result.track = BytesToTrackId(id_str.substr(1));
+  }
+
+  return result;
+}
+
+auto TrackIdToBytes(TrackId id) -> std::string {
+  return cppbor::Uint{id}.toString();
+}
+
+auto BytesToTrackId(std::span<const char> bytes) -> std::optional<TrackId> {
+  auto [res, unused, err] = cppbor::parse(
+      reinterpret_cast<const uint8_t*>(bytes.data()), bytes.size());
+  if (!res || res->type() != cppbor::UINT) {
+    return {};
+  }
+  return res->asUint()->unsignedValue();
+}
+
+}  // namespace database
diff --git a/src/tangara/database/records.hpp b/src/tangara/database/records.hpp
new file mode 100644
index 00000000..3ca68fea
--- /dev/null
+++ b/src/tangara/database/records.hpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <string>
+#include <variant>
+#include <vector>
+
+#include "leveldb/db.h"
+#include "leveldb/slice.h"
+
+#include "index.hpp"
+#include "memory_resource.hpp"
+#include "track.hpp"
+
+namespace database {
+
+auto EncodePathKey(std::string_view path) -> std::string;
+
+/*
+ * Returns the prefix added to every TrackData key. This can be used to iterate
+ * over every data record in the database.
+ */
+auto EncodeDataPrefix() -> std::string;
+
+/* Encodes a data key for a track with the specified id. */
+auto EncodeDataKey(const TrackId& id) -> std::string;
+
+/*
+ * Encodes a TrackData instance into bytes, in preparation for storing it within
+ * the database. This encoding is consistent, and will remain stable over time.
+ */
+auto EncodeDataValue(const TrackData& track) -> std::string;
+
+/*
+ * Parses bytes previously encoded via EncodeDataValue back into a TrackData.
+ * May return nullopt if parsing fails.
+ */
+auto ParseDataValue(const leveldb::Slice& slice) -> std::shared_ptr<TrackData>;
+
+/* Encodes a hash key for the specified hash. */
+auto EncodeHashKey(const uint64_t& hash) -> std::string;
+
+/*
+ * Encodes a hash value (at this point just a track id) into bytes, in
+ * preparation for storing within the database. This encoding is consistent, and
+ * will remain stable over time.
+ */
+auto EncodeHashValue(TrackId id) -> std::string;
+
+/* Encodes a hash key for the specified hash. */
+auto EncodeTagHashKey(const uint64_t& hash) -> std::string;
+
+/*
+ * Parses bytes previously encoded via EncodeHashValue back into a track id. May
+ * return nullopt if parsing fails.
+ */
+auto ParseHashValue(const leveldb::Slice&) -> std::optional<TrackId>;
+
+/* Encodes a prefix that matches all index keys, of all ids and depths. */
+auto EncodeAllIndexesPrefix() -> std::string;
+
+/*
+ */
+auto EncodeIndexPrefix(const IndexKey::Header&) -> std::string;
+
+auto EncodeIndexKey(const IndexKey&) -> std::string;
+auto ParseIndexKey(const leveldb::Slice&) -> std::optional<IndexKey>;
+
+/* Encodes a TrackId as bytes. */
+auto TrackIdToBytes(TrackId id) -> std::string;
+
+/*
+ * Converts a track id encoded via TrackIdToBytes back into a TrackId. May
+ * return nullopt if parsing fails.
+ */
+auto BytesToTrackId(std::span<const char> bytes) -> std::optional<TrackId>;
+
+}  // namespace database
diff --git a/src/tangara/database/tag_parser.cpp b/src/tangara/database/tag_parser.cpp
new file mode 100644
index 00000000..cbcbdcb5
--- /dev/null
+++ b/src/tangara/database/tag_parser.cpp
@@ -0,0 +1,208 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "tag_parser.hpp"
+
+#include <cstdint>
+#include <cstdlib>
+#include <iomanip>
+#include <mutex>
+
+#include "esp_log.h"
+#include "ff.h"
+#include "spi.hpp"
+#include "tags.h"
+
+#include "memory_resource.hpp"
+
+namespace database {
+
+static auto convert_tag(int tag) -> std::optional<Tag> {
+  switch (tag) {
+    case Ttitle:
+      return Tag::kTitle;
+    case Tartist:
+      return Tag::kArtist;
+    case Talbumartist:
+      return Tag::kAlbumArtist;
+    case Talbum:
+      return Tag::kAlbum;
+    case Ttrack:
+      return Tag::kTrack;
+    case Tgenre:
+      return Tag::kGenres;
+    default:
+      return {};
+  }
+}
+
+namespace libtags {
+
+struct Aux {
+  FIL file;
+  FILINFO info;
+  TrackTags* tags;
+};
+
+static int read(Tagctx* ctx, void* buf, int cnt) {
+  Aux* aux = reinterpret_cast<Aux*>(ctx->aux);
+  if (f_eof(&aux->file)) {
+    return 0;
+  }
+  UINT bytes_read;
+  if (f_read(&aux->file, buf, cnt, &bytes_read) != FR_OK) {
+    return -1;
+  }
+  return bytes_read;
+}
+
+static int seek(Tagctx* ctx, int offset, int whence) {
+  Aux* aux = reinterpret_cast<Aux*>(ctx->aux);
+  FRESULT res;
+  if (whence == 0) {
+    // Seek from the start of the file. This is f_lseek's behaviour.
+    res = f_lseek(&aux->file, offset);
+  } else if (whence == 1) {
+    // Seek from current offset.
+    res = f_lseek(&aux->file, aux->file.fptr + offset);
+  } else if (whence == 2) {
+    // Seek from the end of the file
+    res = f_lseek(&aux->file, aux->info.fsize + offset);
+  } else {
+    return -1;
+  }
+  if (res != FR_OK) {
+    return -1;
+  }
+  return aux->file.fptr;
+}
+
+static void tag(Tagctx* ctx,
+                int t,
+                const char* k,
+                const char* v,
+                int offset,
+                int size,
+                Tagread f) {
+  Aux* aux = reinterpret_cast<Aux*>(ctx->aux);
+  auto tag = convert_tag(t);
+  if (!tag) {
+    return;
+  }
+  std::pmr::string value{v, &memory::kSpiRamResource};
+  if (value.empty()) {
+    return;
+  }
+  aux->tags->set(*tag, value);
+}
+
+static void toc(Tagctx* ctx, int ms, int offset) {}
+
+}  // namespace libtags
+
+static const std::size_t kBufSize = 1024;
+[[maybe_unused]] static const char* kTag = "TAGS";
+
+TagParserImpl::TagParserImpl() {}
+
+auto TagParserImpl::ReadAndParseTags(std::string_view path)
+    -> std::shared_ptr<TrackTags> {
+  {
+    std::lock_guard<std::mutex> lock{cache_mutex_};
+    std::optional<std::shared_ptr<TrackTags>> cached =
+        cache_.Get({path.data(), path.size()});
+    if (cached) {
+      return *cached;
+    }
+  }
+
+  std::shared_ptr<TrackTags> tags = parseNew(path);
+  if (!tags) {
+    return {};
+  }
+
+  // There wasn't a track number found in the track's tags. Try to synthesize
+  // one from the filename, which will sometimes have a track number at the
+  // start.
+  if (!tags->track()) {
+    auto slash_pos = path.find_last_of("/");
+    if (slash_pos != std::string::npos && path.size() - slash_pos > 1) {
+      auto trunc = path.substr(slash_pos + 1);
+      tags->track({trunc.data(), trunc.size()});
+    }
+  }
+
+  {
+    std::lock_guard<std::mutex> lock{cache_mutex_};
+    cache_.Put({path.data(), path.size(), &memory::kSpiRamResource}, tags);
+  }
+
+  return tags;
+}
+
+auto TagParserImpl::parseNew(std::string_view p) -> std::shared_ptr<TrackTags> {
+  std::string path{p};
+  libtags::Aux aux;
+  auto out = TrackTags::create();
+  aux.tags = out.get();
+  {
+    auto lock = drivers::acquire_spi();
+
+    if (f_stat(path.c_str(), &aux.info) != FR_OK ||
+        f_open(&aux.file, path.c_str(), FA_READ) != FR_OK) {
+      ESP_LOGW(kTag, "failed to open file %s", path.c_str());
+      return {};
+    }
+  }
+  // Fine to have this on the stack; this is only called on tasks with large
+  // stacks anyway, due to all the string handling.
+  char buf[kBufSize];
+  Tagctx ctx;
+  ctx.read = libtags::read;
+  ctx.seek = libtags::seek;
+  ctx.tag = libtags::tag;
+  ctx.toc = libtags::toc;
+  ctx.aux = &aux;
+  ctx.buf = buf;
+  ctx.bufsz = kBufSize;
+
+  int res;
+  {
+    auto lock = drivers::acquire_spi();
+    res = tagsget(&ctx);
+    f_close(&aux.file);
+  }
+
+  if (res != 0) {
+    // Parsing failed.
+    ESP_LOGE(kTag, "tag parsing for %s failed, reason %d", path.c_str(), res);
+    return {};
+  }
+
+  switch (ctx.format) {
+    case Fmp3:
+      out->encoding(Container::kMp3);
+      break;
+    case Fogg:
+      out->encoding(Container::kOgg);
+      break;
+    case Fflac:
+      out->encoding(Container::kFlac);
+      break;
+    case Fwav:
+      out->encoding(Container::kWav);
+      break;
+    case Fopus:
+      out->encoding(Container::kOpus);
+      break;
+    default:
+      out->encoding(Container::kUnsupported);
+  }
+
+  return out;
+}
+
+}  // namespace database
diff --git a/src/tangara/database/tag_parser.hpp b/src/tangara/database/tag_parser.hpp
new file mode 100644
index 00000000..966258b5
--- /dev/null
+++ b/src/tangara/database/tag_parser.hpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <string>
+
+#include "lru_cache.hpp"
+#include "track.hpp"
+
+namespace database {
+
+class ITagParser {
+ public:
+  virtual ~ITagParser() {}
+  virtual auto ReadAndParseTags(std::string_view path)
+      -> std::shared_ptr<TrackTags> = 0;
+};
+
+class TagParserImpl : public ITagParser {
+ public:
+  TagParserImpl();
+  auto ReadAndParseTags(std::string_view path)
+      -> std::shared_ptr<TrackTags> override;
+
+ private:
+  auto parseNew(std::string_view path) -> std::shared_ptr<TrackTags>;
+
+  /*
+   * Cache of tags that have already been extracted from files. Ideally this
+   * cache should be slightly larger than any page sizes in the UI.
+   */
+  std::mutex cache_mutex_;
+  util::LruCache<8, std::pmr::string, std::shared_ptr<TrackTags>> cache_;
+
+  // We could also consider keeping caches of artist name -> std::string and
+  // similar. This hasn't been done yet, as this isn't a common workload in
+  // any of our UI.
+};
+
+}  // namespace database
diff --git a/src/tangara/database/test/CMakeLists.txt b/src/tangara/database/test/CMakeLists.txt
new file mode 100644
index 00000000..a9f2cedb
--- /dev/null
+++ b/src/tangara/database/test/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2023 jacqueline <me@jacqueline.id.au>
+#
+# SPDX-License-Identifier: GPL-3.0-only
+
+idf_component_register(
+  SRCS "test_records.cpp" "test_database.cpp"
+  INCLUDE_DIRS "."
+  REQUIRES catch2 cmock database drivers fixtures)
diff --git a/src/tangara/database/test/test_database.cpp b/src/tangara/database/test/test_database.cpp
new file mode 100644
index 00000000..6aec9bfb
--- /dev/null
+++ b/src/tangara/database/test/test_database.cpp
@@ -0,0 +1,210 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "database.hpp"
+
+#include <stdint.h>
+#include <iomanip>
+#include <map>
+#include <memory>
+#include <string>
+
+#include "catch2/catch.hpp"
+#include "driver_cache.hpp"
+#include "esp_log.h"
+#include "file_gatherer.hpp"
+#include "i2c_fixture.hpp"
+#include "leveldb/db.h"
+#include "spi_fixture.hpp"
+#include "tag_parser.hpp"
+#include "track.hpp"
+
+namespace database {
+
+class TestBackends : public IFileGatherer, public ITagParser {
+ public:
+  std::map<std::pmr::string, TrackTags> tracks;
+
+  auto MakeTrack(const std::pmr::string& path, const std::pmr::string& title)
+      -> void {
+    TrackTags tags;
+    tags.encoding = Encoding::kMp3;
+    tags.title = title;
+    tracks[path] = tags;
+  }
+
+  auto FindFiles(const std::pmr::string& root,
+                 std::function<void(const std::pmr::string&)> cb)
+      -> void override {
+    for (auto keyval : tracks) {
+      std::invoke(cb, keyval.first);
+    }
+  }
+
+  auto ReadAndParseTags(const std::pmr::string& path, TrackTags* out)
+      -> bool override {
+    if (tracks.contains(path)) {
+      *out = tracks.at(path);
+      return true;
+    }
+    return false;
+  }
+};
+
+TEST_CASE("track database", "[integration]") {
+  I2CFixture i2c;
+  SpiFixture spi;
+  drivers::DriverCache drivers;
+  auto storage = drivers.AcquireStorage();
+
+  Database::Destroy();
+
+  TestBackends tracks;
+  auto open_res = Database::Open(&tracks, &tracks);
+  REQUIRE(open_res.has_value());
+  std::unique_ptr<Database> db(open_res.value());
+
+  SECTION("empty database") {
+    std::unique_ptr<Result<Track>> res(db->GetTracks(10).get());
+    REQUIRE(res->values().size() == 0);
+  }
+
+  SECTION("add new tracks") {
+    tracks.MakeTrack("track1.mp3", "Track 1");
+    tracks.MakeTrack("track2.wav", "Track 2");
+    tracks.MakeTrack("track3.exe", "Track 3");
+
+    db->Update();
+
+    std::unique_ptr<Result<Track>> res(db->GetTracks(10).get());
+    REQUIRE(res->values().size() == 3);
+    CHECK(*res->values().at(0).tags().title == "Track 1");
+    CHECK(res->values().at(0).data().id() == 1);
+    CHECK(*res->values().at(1).tags().title == "Track 2");
+    CHECK(res->values().at(1).data().id() == 2);
+    CHECK(*res->values().at(2).tags().title == "Track 3");
+    CHECK(res->values().at(2).data().id() == 3);
+
+    SECTION("update with no filesystem changes") {
+      db->Update();
+
+      std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
+      REQUIRE(new_res->values().size() == 3);
+      CHECK(res->values().at(0) == new_res->values().at(0));
+      CHECK(res->values().at(1) == new_res->values().at(1));
+      CHECK(res->values().at(2) == new_res->values().at(2));
+    }
+
+    SECTION("update with all tracks gone") {
+      tracks.tracks.clear();
+
+      db->Update();
+
+      std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
+      CHECK(new_res->values().size() == 0);
+
+      SECTION("update with one track returned") {
+        tracks.MakeTrack("track2.wav", "Track 2");
+
+        db->Update();
+
+        std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
+        REQUIRE(new_res->values().size() == 1);
+        CHECK(res->values().at(1) == new_res->values().at(0));
+      }
+    }
+
+    SECTION("update with one track gone") {
+      tracks.tracks.erase("track2.wav");
+
+      db->Update();
+
+      std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
+      REQUIRE(new_res->values().size() == 2);
+      CHECK(res->values().at(0) == new_res->values().at(0));
+      CHECK(res->values().at(2) == new_res->values().at(1));
+    }
+
+    SECTION("update with tags changed") {
+      tracks.MakeTrack("track3.exe", "The Track 3");
+
+      db->Update();
+
+      std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
+      REQUIRE(new_res->values().size() == 3);
+      CHECK(res->values().at(0) == new_res->values().at(0));
+      CHECK(res->values().at(1) == new_res->values().at(1));
+      CHECK(*new_res->values().at(2).tags().title == "The Track 3");
+      // The id should not have changed, since this was just a tag update.
+      CHECK(res->values().at(2).data().id() ==
+            new_res->values().at(2).data().id());
+    }
+
+    SECTION("update with one new track") {
+      tracks.MakeTrack("my track.midi", "Track 1 (nightcore remix)");
+
+      db->Update();
+
+      std::unique_ptr<Result<Track>> new_res(db->GetTracks(10).get());
+      REQUIRE(new_res->values().size() == 4);
+      CHECK(res->values().at(0) == new_res->values().at(0));
+      CHECK(res->values().at(1) == new_res->values().at(1));
+      CHECK(res->values().at(2) == new_res->values().at(2));
+      CHECK(*new_res->values().at(3).tags().title ==
+            "Track 1 (nightcore remix)");
+      CHECK(new_res->values().at(3).data().id() == 4);
+    }
+
+    SECTION("get tracks with pagination") {
+      std::unique_ptr<Result<Track>> res(db->GetTracks(1).get());
+
+      REQUIRE(res->values().size() == 1);
+      CHECK(res->values().at(0).data().id() == 1);
+      REQUIRE(res->next_page());
+
+      res.reset(db->GetPage(&res->next_page().value()).get());
+
+      REQUIRE(res->values().size() == 1);
+      CHECK(res->values().at(0).data().id() == 2);
+      REQUIRE(res->next_page());
+
+      res.reset(db->GetPage(&res->next_page().value()).get());
+
+      REQUIRE(res->values().size() == 1);
+      CHECK(res->values().at(0).data().id() == 3);
+      REQUIRE(!res->next_page());
+
+      SECTION("page backwards") {
+        REQUIRE(res->prev_page());
+
+        res.reset(db->GetPage(&res->prev_page().value()).get());
+
+        REQUIRE(res->values().size() == 1);
+        CHECK(res->values().at(0).data().id() == 2);
+        REQUIRE(res->prev_page());
+
+        res.reset(db->GetPage(&res->prev_page().value()).get());
+
+        REQUIRE(res->values().size() == 1);
+        CHECK(res->values().at(0).data().id() == 1);
+        REQUIRE(!res->prev_page());
+
+        SECTION("page forwards again") {
+          REQUIRE(res->next_page());
+
+          res.reset(db->GetPage(&res->next_page().value()).get());
+
+          REQUIRE(res->values().size() == 1);
+          CHECK(res->values().at(0).data().id() == 2);
+          CHECK(res->next_page());
+          CHECK(res->prev_page());
+        }
+      }
+    }
+  }
+}
+
+}  // namespace database
diff --git a/src/tangara/database/test/test_records.cpp b/src/tangara/database/test/test_records.cpp
new file mode 100644
index 00000000..2f59489c
--- /dev/null
+++ b/src/tangara/database/test/test_records.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "records.hpp"
+
+#include <stdint.h>
+#include <iomanip>
+#include <string>
+
+#include "catch2/catch.hpp"
+
+std::pmr::string ToHex(const std::pmr::string& s) {
+  std::ostringstream ret;
+
+  for (std::pmr::string::size_type i = 0; i < s.length(); ++i)
+    ret << std::hex << std::setfill('0') << std::setw(2) << std::uppercase
+        << (int)s[i];
+
+  return ret.str();
+}
+
+namespace database {
+
+TEST_CASE("database record encoding", "[unit]") {
+  SECTION("track id to bytes") {
+    TrackId id = 1234678;
+    OwningSlice as_bytes = TrackIdToBytes(id);
+
+    SECTION("encodes correctly") {
+      // Purposefully a brittle test, since we need to be very careful about
+      // changing the way records are encoded.
+      REQUIRE(as_bytes.data.size() == 5);
+      // unsigned value
+      CHECK(as_bytes.data[0] == 0x1A);
+      // TODO(jacqueline): what's up with these failing?
+      // 12345678
+      // CHECK(as_bytes.data[1] == 0x00);
+      // CHECK(as_bytes.data[2] == 0x01);
+      // CHECK(as_bytes.data[3] == 0xE2);
+      // CHECK(as_bytes.data[4] == 0x40);
+    }
+
+    SECTION("round-trips") {
+      CHECK(*BytesToTrackId(as_bytes.data) == id);
+    }
+
+    SECTION("encodes compactly") {
+      OwningSlice small_id = TrackIdToBytes(1);
+      OwningSlice large_id = TrackIdToBytes(999999);
+
+      CHECK(small_id.data.size() < large_id.data.size());
+    }
+
+    SECTION("decoding rejects garbage") {
+      std::optional<TrackId> res = BytesToTrackId("i'm gay");
+
+      CHECK(res.has_value() == false);
+    }
+  }
+
+  SECTION("data keys") {
+    OwningSlice key = CreateDataKey(123456);
+
+    REQUIRE(key.data.size() == 7);
+    CHECK(key.data[0] == 'D');
+    CHECK(key.data[1] == '\0');
+    // unsigned int
+    CHECK(key.data[2] == 0x1A);
+    // assume the int encoding is fine.
+  }
+
+  SECTION("data values") {
+    TrackData data(123, "/some/path.mp3", 0xACAB, 69, true);
+
+    OwningSlice enc = CreateDataValue(data);
+
+    SECTION("encodes correctly") {
+      REQUIRE(enc.data.size() == 24);
+
+      // Array, length 5
+      CHECK(enc.data[0] == 0x85);
+
+      // unsigned int, value 123
+      CHECK(enc.data[1] == 0x18);
+      CHECK(enc.data[2] == 0x7B);
+
+      // text, 14 chars
+      CHECK(enc.data[3] == 0x6E);
+      // ... assume the text looks okay.
+
+      // unsigned int, value 44203
+      CHECK(enc.data[18] == 0x19);
+      CHECK(enc.data[19] == 0xAC);
+      CHECK(enc.data[20] == 0xAB);
+
+      // unsigned int, value 69
+      CHECK(enc.data[21] == 0x18);
+      CHECK(enc.data[22] == 0x45);
+
+      // primitive 21, true
+      CHECK(enc.data[23] == 0xF5);
+    }
+
+    SECTION("round-trips") {
+      CHECK(ParseDataValue(enc.slice) == data);
+    }
+
+    SECTION("decoding rejects garbage") {
+      std::optional<TrackData> res = ParseDataValue("hi!");
+
+      CHECK(res.has_value() == false);
+    }
+  }
+
+  SECTION("hash keys") {
+    OwningSlice key = CreateHashKey(123456);
+
+    REQUIRE(key.data.size() == 7);
+    CHECK(key.data[0] == 'H');
+    CHECK(key.data[1] == '\0');
+    // unsigned int
+    CHECK(key.data[2] == 0x1A);
+    // assume the int encoding is fine.
+  }
+
+  SECTION("hash values") {
+    OwningSlice val = CreateHashValue(123456);
+
+    CHECK(val.data == TrackIdToBytes(123456).data);
+
+    SECTION("round-trips") {
+      CHECK(ParseHashValue(val.slice) == 123456);
+    }
+
+    SECTION("decoding rejects garbage") {
+      std::optional<TrackId> res = ParseHashValue("the first track :)");
+
+      CHECK(res.has_value() == false);
+    }
+  }
+}
+
+}  // namespace database
diff --git a/src/tangara/database/track.cpp b/src/tangara/database/track.cpp
new file mode 100644
index 00000000..1b1442a1
--- /dev/null
+++ b/src/tangara/database/track.cpp
@@ -0,0 +1,307 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "track.hpp"
+
+#include <iomanip>
+#include <iostream>
+#include <memory_resource>
+#include <span>
+#include <sstream>
+#include <string>
+
+#include "esp_log.h"
+#include "komihash.h"
+
+#include "memory_resource.hpp"
+
+namespace database {
+
+static constexpr char kGenreDelimiters[] = ",;";
+
+auto tagName(Tag t) -> std::string {
+  switch (t) {
+    case Tag::kTitle:
+      return "title";
+    case Tag::kArtist:
+      return "artist";
+    case Tag::kAlbum:
+      return "album";
+    case Tag::kAlbumArtist:
+      return "album_artist";
+    case Tag::kDisc:
+      return "disc";
+    case Tag::kTrack:
+      return "track";
+    case Tag::kAlbumOrder:
+      return "album_order";
+    case Tag::kGenres:
+      return "genre";
+  }
+  return "";
+}
+
+auto tagHash(const TagValue& t) -> uint64_t {
+  return std::visit(
+      [&](auto&& arg) {
+        using T = std::decay_t<decltype(arg)>;
+        if constexpr (std::is_same_v<T, std::monostate>) {
+          return static_cast<uint64_t>(0);
+        } else if constexpr (std::is_same_v<T, std::pmr::string>) {
+          return komihash(arg.data(), arg.size(), 0);
+        } else if constexpr (std::is_same_v<T, uint32_t>) {
+          return komihash(&arg, sizeof(arg), 0);
+        } else if constexpr (std::is_same_v<
+                                 T, std::span<const std::pmr::string>>) {
+          komihash_stream_t hash;
+          komihash_stream_init(&hash, 0);
+          for (const auto& i : arg) {
+            komihash_stream_update(&hash, i.data(), i.size());
+          }
+          return komihash_stream_final(&hash);
+        }
+      },
+      t);
+  return 0;
+}
+
+auto tagToString(const TagValue& val) -> std::string {
+  return std::visit(
+      [&](auto&& arg) -> std::string {
+        using T = std::decay_t<decltype(arg)>;
+        if constexpr (std::is_same_v<T, std::monostate>) {
+          return "";
+        } else if constexpr (std::is_same_v<T, std::pmr::string>) {
+          return {arg.data(), arg.size()};
+        } else if constexpr (std::is_same_v<T, uint32_t>) {
+          return std::to_string(arg);
+        } else if constexpr (std::is_same_v<
+                                 T, std::span<const std::pmr::string>>) {
+          std::ostringstream builder{};
+          for (const auto& str : arg) {
+            builder << std::string{str.data(), str.size()} << ",";
+          }
+          return builder.str();
+        }
+      },
+      val);
+  return "";
+}
+
+auto TrackTags::create() -> std::shared_ptr<TrackTags> {
+  return std::allocate_shared<TrackTags,
+                              std::pmr::polymorphic_allocator<TrackTags>>(
+      &memory::kSpiRamResource);
+}
+
+template <typename T>
+auto valueOrMonostate(std::optional<T> t) -> TagValue {
+  if (t) {
+    return *t;
+  }
+  return std::monostate{};
+}
+
+auto TrackTags::get(Tag t) const -> TagValue {
+  switch (t) {
+    case Tag::kTitle:
+      return valueOrMonostate(title_);
+    case Tag::kArtist:
+      return valueOrMonostate(artist_);
+    case Tag::kAlbum:
+      return valueOrMonostate(album_);
+    case Tag::kAlbumArtist:
+      return valueOrMonostate(album_artist_);
+    case Tag::kDisc:
+      return valueOrMonostate(disc_);
+    case Tag::kTrack:
+      return valueOrMonostate(track_);
+    case Tag::kAlbumOrder:
+      return albumOrder();
+    case Tag::kGenres:
+      return genres_;
+  }
+  return std::monostate{};
+}
+
+auto TrackTags::set(Tag t, std::string_view v) -> void {
+  switch (t) {
+    case Tag::kTitle:
+      title(v);
+      break;
+    case Tag::kArtist:
+      artist(v);
+      break;
+    case Tag::kAlbum:
+      album(v);
+      break;
+    case Tag::kAlbumArtist:
+      albumArtist(v);
+      break;
+    case Tag::kDisc:
+      disc(v);
+      break;
+    case Tag::kTrack:
+      track(v);
+      break;
+    case Tag::kAlbumOrder:
+      // This tag is derices from disc and track, and so it can't be set.
+      break;
+    case Tag::kGenres:
+      genres(v);
+      break;
+  }
+}
+
+auto TrackTags::allPresent() const -> std::vector<Tag> {
+  std::vector<Tag> out;
+  auto add_if_present = [&](Tag t, auto opt) {
+    if (opt) {
+      out.push_back(t);
+    }
+  };
+  add_if_present(Tag::kTitle, title_);
+  add_if_present(Tag::kArtist, artist_);
+  add_if_present(Tag::kAlbum, album_);
+  add_if_present(Tag::kAlbumArtist, album_artist_);
+  add_if_present(Tag::kDisc, disc_);
+  add_if_present(Tag::kTrack, track_);
+  add_if_present(Tag::kGenres, !genres_.empty());
+  return out;
+}
+
+auto TrackTags::title() const -> const std::optional<std::pmr::string>& {
+  return title_;
+}
+
+auto TrackTags::title(std::string_view s) -> void {
+  title_ = s;
+}
+
+auto TrackTags::artist() const -> const std::optional<std::pmr::string>& {
+  return artist_;
+}
+
+auto TrackTags::artist(std::string_view s) -> void {
+  artist_ = s;
+}
+
+auto TrackTags::album() const -> const std::optional<std::pmr::string>& {
+  return album_;
+}
+
+auto TrackTags::album(std::string_view s) -> void {
+  album_ = s;
+}
+
+auto TrackTags::albumArtist() const -> const std::optional<std::pmr::string>& {
+  return album_artist_;
+}
+
+auto TrackTags::albumArtist(std::string_view s) -> void {
+  album_artist_ = s;
+}
+
+auto TrackTags::disc() const -> const std::optional<uint8_t>& {
+  return disc_;
+}
+
+auto TrackTags::disc(const std::string_view s) -> void {
+  disc_ = std::strtol(s.data(), nullptr, 10);
+}
+
+auto TrackTags::track() const -> const std::optional<uint16_t>& {
+  return track_;
+}
+
+auto TrackTags::track(const std::string_view s) -> void {
+  track_ = std::strtol(s.data(), nullptr, 10);
+}
+
+auto TrackTags::albumOrder() const -> uint32_t {
+  return (disc_.value_or(0) << 16) | track_.value_or(0);
+}
+
+auto TrackTags::genres() const -> std::span<const std::pmr::string> {
+  return genres_;
+}
+
+auto TrackTags::genres(const std::string_view s) -> void {
+  genres_.clear();
+  std::string src = {s.data(), s.size()};
+  char* token = std::strtok(src.data(), kGenreDelimiters);
+
+  auto trim_and_add = [this](std::string_view s) {
+    std::string copy = {s.data(), s.size()};
+
+    // Trim the left
+    copy.erase(copy.begin(),
+               std::find_if(copy.begin(), copy.end(), [](unsigned char ch) {
+                 return !std::isspace(ch);
+               }));
+
+    // Trim the right
+    copy.erase(std::find_if(copy.rbegin(), copy.rend(),
+                            [](unsigned char ch) { return !std::isspace(ch); })
+                   .base(),
+               copy.end());
+
+    // Ignore empty strings.
+    if (!copy.empty()) {
+      genres_.push_back({copy.data(), copy.size()});
+    }
+  };
+
+  if (token == NULL) {
+    // No delimiters found in the input. Treat this as a single genre.
+    trim_and_add(s);
+  } else {
+    while (token != NULL) {
+      // Add tokens until no more delimiters found.
+      trim_and_add(token);
+      token = std::strtok(NULL, kGenreDelimiters);
+    }
+  }
+}
+
+/*
+ * Uses a komihash stream to incrementally hash tags. This lowers the
+ * function's memory footprint a little so that it's safe to call from any
+ * stack.
+ */
+auto TrackTags::Hash() const -> uint64_t {
+  // TODO(jacqueline): this function doesn't work very well for tracks with no
+  // tags at all.
+  komihash_stream_t stream;
+  komihash_stream_init(&stream, 0);
+
+  auto add = [&](const uint64_t& h) {
+    komihash_stream_update(&stream, &h, sizeof(h));
+  };
+
+  add(tagHash(get(Tag::kTitle)));
+  add(tagHash(get(Tag::kArtist)));
+  add(tagHash(get(Tag::kAlbum)));
+  add(tagHash(get(Tag::kAlbumArtist)));
+
+  // TODO: Should we be including this?
+  add(tagHash(get(Tag::kAlbumOrder)));
+
+  return komihash_stream_final(&stream);
+}
+
+auto Track::TitleOrFilename() const -> std::pmr::string {
+  auto title = tags().title();
+  if (title) {
+    return *title;
+  }
+  auto start = data().filepath.find_last_of('/');
+  if (start == std::pmr::string::npos) {
+    return data().filepath;
+  }
+  return data().filepath.substr(start + 1);
+}
+}  // namespace database
diff --git a/src/tangara/database/track.hpp b/src/tangara/database/track.hpp
new file mode 100644
index 00000000..b097ab52
--- /dev/null
+++ b/src/tangara/database/track.hpp
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <cstdint>
+
+#include <map>
+#include <memory>
+#include <optional>
+#include <span>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <variant>
+
+#include "leveldb/db.h"
+#include "memory_resource.hpp"
+
+namespace database {
+
+/*
+ * Uniquely describes a single track within the database. This value will be
+ * consistent across database updates, and should ideally (but is not guaranteed
+ * to) endure even across a track being removed and re-added.
+ *
+ * Four billion tracks should be enough for anybody.
+ */
+typedef uint32_t TrackId;
+
+/*
+ * Audio file encodings that we are aware of. Used to select an appropriate
+ * decoder at play time.
+ *
+ * Values of this enum are persisted in this database, so it is probably never a
+ * good idea to change the int representation of an existing value.
+ */
+enum class Container {
+  kUnsupported = 0,
+  kMp3 = 1,
+  kWav = 2,
+  kOgg = 3,
+  kFlac = 4,
+  kOpus = 5,
+};
+
+enum class Tag {
+  kTitle = 0,
+  kArtist = 1,
+  kAlbum = 2,
+  kAlbumArtist = 3,
+  kDisc = 4,
+  kTrack = 5,
+  kAlbumOrder = 6,
+  kGenres = 7,
+};
+
+using TagValue = std::variant<std::monostate,
+                              std::pmr::string,
+                              uint32_t,
+                              std::span<const std::pmr::string>>;
+
+auto tagName(Tag) -> std::string;
+auto tagHash(const TagValue&) -> uint64_t;
+auto tagToString(const TagValue&) -> std::string;
+
+/*
+ * Owning container for tag-related track metadata that was extracted from a
+ * file.
+ */
+class TrackTags {
+ public:
+  static auto create() -> std::shared_ptr<TrackTags>;
+
+  TrackTags()
+      : encoding_(Container::kUnsupported), genres_(&memory::kSpiRamResource) {}
+
+  TrackTags(const TrackTags& other) = delete;
+  TrackTags& operator=(TrackTags& other) = delete;
+
+  bool operator==(const TrackTags&) const = default;
+
+  auto get(Tag) const -> TagValue;
+  auto set(Tag, std::string_view) -> void;
+
+  auto allPresent() const -> std::vector<Tag>;
+
+  auto encoding() const -> Container { return encoding_; };
+  auto encoding(Container e) -> void { encoding_ = e; };
+
+  auto title() const -> const std::optional<std::pmr::string>&;
+  auto title(std::string_view) -> void;
+
+  auto artist() const -> const std::optional<std::pmr::string>&;
+  auto artist(std::string_view) -> void;
+
+  auto album() const -> const std::optional<std::pmr::string>&;
+  auto album(std::string_view) -> void;
+
+  auto albumArtist() const -> const std::optional<std::pmr::string>&;
+  auto albumArtist(std::string_view) -> void;
+
+  auto disc() const -> const std::optional<uint8_t>&;
+  auto disc(const std::string_view) -> void;
+
+  auto track() const -> const std::optional<uint16_t>&;
+  auto track(const std::string_view) -> void;
+
+  auto albumOrder() const -> uint32_t;
+
+  auto genres() const -> std::span<const std::pmr::string>;
+  auto genres(const std::string_view) -> void;
+
+  /*
+   * Returns a hash of the 'identifying' tags of this track. That is, a hash
+   * that can be used to determine if one track is likely the same as another,
+   * across things like re-encoding, re-mastering, or moving the underlying
+   * file.
+   */
+  auto Hash() const -> uint64_t;
+
+ private:
+  Container encoding_;
+
+  std::optional<std::pmr::string> title_;
+  std::optional<std::pmr::string> artist_;
+  std::optional<std::pmr::string> album_;
+  std::optional<std::pmr::string> album_artist_;
+  std::optional<uint8_t> disc_;
+  std::optional<uint16_t> track_;
+  std::pmr::vector<std::pmr::string> genres_;
+};
+
+/*
+ * Owning container for all of the metadata we store for a particular track.
+ * This includes two main kinds of metadata:
+ *  1. static(ish) attributes, such as the id, path on disk, hash of the tags
+ *  2. dynamic attributes, such as the number of times this track has been
+ *  played.
+ *
+ * Because a TrackData is immutable, it is thread safe but will not reflect any
+ * changes to the dynamic attributes that may happen after it was obtained.
+ *
+ * Tracks may be 'tombstoned'; this indicates that the track is no longer
+ * present at its previous location on disk, and we do not have any existing
+ * files with a matching tags_hash. When this is the case, we ignore this
+ * TrackData for most purposes. We keep the entry in our database so that we can
+ * properly restore dynamic attributes (such as play count) if the track later
+ * re-appears on disk.
+ */
+struct TrackData {
+ public:
+  TrackData()
+      : id(0),
+        filepath(),
+        tags_hash(0),
+        individual_tag_hashes(&memory::kSpiRamResource),
+        is_tombstoned(false),
+        modified_at() {}
+
+  TrackId id;
+  std::pmr::string filepath;
+  uint64_t tags_hash;
+  std::pmr::unordered_map<Tag, uint64_t> individual_tag_hashes;
+  bool is_tombstoned;
+  std::pair<uint16_t, uint16_t> modified_at;
+
+  TrackData(TrackData&& other) = delete;
+  TrackData& operator=(TrackData& other) = delete;
+
+  bool operator==(const TrackData&) const = default;
+};
+
+/*
+ * Immutable and owning combination of a track's tags and metadata.
+ *
+ * Note that instances of this class may have a fairly large memory impact, due
+ * to the large number of strings they own. Prefer to query the database again
+ * (which has its own caching layer), rather than retaining Track instances for
+ * a long time.
+ */
+class Track {
+ public:
+  Track(std::shared_ptr<TrackData>& data, std::shared_ptr<TrackTags> tags)
+      : data_(data), tags_(tags) {}
+
+  Track(Track& other) = delete;
+  Track& operator=(Track& other) = delete;
+
+  bool operator==(const Track&) const = default;
+
+  auto data() const -> const TrackData& { return *data_; }
+  auto tags() const -> const TrackTags& { return *tags_; }
+
+  auto TitleOrFilename() const -> std::pmr::string;
+
+ private:
+  std::shared_ptr<const TrackData> data_;
+  std::shared_ptr<TrackTags> tags_;
+};
+
+}  // namespace database
author	jacqueline <me@jacqueline.id.au>	2024-05-02 19:12:26 +1000
committer	jacqueline <me@jacqueline.id.au>	2024-05-02 19:12:26 +1000
commit	1573a8c4cde1cd9528b422b2dcc598e37ffe94a7 (patch)
tree	d162822b8fd7054f81bace0c7a65ab4d5e6f93ef /src/tangara/database
parent	a231fd1c8afedbeb14b0bc77d76bad61db986059 (diff)
download	tangara-fw-1573a8c4cde1cd9528b422b2dcc598e37ffe94a7.tar.gz