10 files changed, 687 insertions, 107 deletions
diff --git a/src/database/CMakeLists.txt b/src/database/CMakeLists.txt
index e7b1f62c..04e1d5d8 100644
--- a/src/database/CMakeLists.txt
+++ b/src/database/CMakeLists.txt
@@ -3,9 +3,9 @@
 # SPDX-License-Identifier: GPL-3.0-only
 
 idf_component_register(
-  SRCS "env_esp.cpp" "database.cpp" "track.cpp" "records.cpp" "file_gatherer.cpp" "tag_parser.cpp"
+  SRCS "env_esp.cpp" "database.cpp" "track.cpp" "records.cpp" "file_gatherer.cpp" "tag_parser.cpp" "index.cpp"
   INCLUDE_DIRS "include"
-  REQUIRES "result" "span" "esp_psram" "fatfs" "libtags" "komihash" "cbor" "tasks")
+  REQUIRES "result" "span" "esp_psram" "fatfs" "libtags" "komihash" "cbor" "tasks" "shared_string")
 
 target_compile_options(${COMPONENT_LIB} PRIVATE ${EXTRA_WARNINGS})
 
diff --git a/src/database/database.cpp b/src/database/database.cpp
index ac5e4873..1ac5d729 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -13,11 +13,13 @@
 #include <functional>
 #include <iomanip>
 #include <memory>
+#include <optional>
 #include <sstream>
 
 #include "esp_log.h"
 #include "ff.h"
 #include "freertos/projdefs.h"
+#include "index.hpp"
 #include "leveldb/cache.h"
 #include "leveldb/db.h"
 #include "leveldb/iterator.h"
@@ -130,72 +132,91 @@ Database::~Database() {
 
 auto Database::Update() -> std::future<void> {
   return worker_task_->Dispatch<void>([&]() -> void {
-    // Stage 1: verify all existing tracks are still valid.
-    ESP_LOGI(kTag, "verifying existing tracks");
-    const leveldb::Snapshot* snapshot = db_->GetSnapshot();
     leveldb::ReadOptions read_options;
     read_options.fill_cache = false;
-    read_options.snapshot = snapshot;
-    leveldb::Iterator* it = db_->NewIterator(read_options);
-    OwningSlice prefix = CreateDataPrefix();
-    it->Seek(prefix.slice);
-    while (it->Valid() && it->key().starts_with(prefix.slice)) {
-      std::optional<TrackData> track = ParseDataValue(it->value());
-      if (!track) {
-        // The value was malformed. Drop this record.
-        ESP_LOGW(kTag, "dropping malformed metadata");
+
+    // Stage 0: discard indexes
+    // TODO(jacqueline): I think it should be possible to incrementally update
+    // indexes, but my brain hurts.
+    ESP_LOGI(kTag, "dropping stale indexes");
+    {
+      leveldb::Iterator* it = db_->NewIterator(read_options);
+      OwningSlice prefix = EncodeAllIndexesPrefix();
+      it->Seek(prefix.slice);
+      while (it->Valid() && it->key().starts_with(prefix.slice)) {
         db_->Delete(leveldb::WriteOptions(), it->key());
         it->Next();
-        continue;
       }
+    }
 
-      if (track->is_tombstoned()) {
-        ESP_LOGW(kTag, "skipping tombstoned %lx", track->id());
-        it->Next();
-        continue;
-      }
+    // Stage 1: verify all existing tracks are still valid.
+    ESP_LOGI(kTag, "verifying existing tracks");
+    {
+      leveldb::Iterator* it = db_->NewIterator(read_options);
+      OwningSlice prefix = EncodeDataPrefix();
+      it->Seek(prefix.slice);
+      while (it->Valid() && it->key().starts_with(prefix.slice)) {
+        std::optional<TrackData> track = ParseDataValue(it->value());
+        if (!track) {
+          // The value was malformed. Drop this record.
+          ESP_LOGW(kTag, "dropping malformed metadata");
+          db_->Delete(leveldb::WriteOptions(), it->key());
+          it->Next();
+          continue;
+        }
 
-      TrackTags tags;
-      if (!tag_parser_->ReadAndParseTags(track->filepath(), &tags) ||
-          tags.encoding == Encoding::kUnsupported) {
-        // We couldn't read the tags for this track. Either they were
-        // malformed, or perhaps the file is missing. Either way, tombstone
-        // this record.
-        ESP_LOGW(kTag, "entombing missing #%lx", track->id());
-        dbPutTrackData(track->Entomb());
-        it->Next();
-        continue;
-      }
+        if (track->is_tombstoned()) {
+          ESP_LOGW(kTag, "skipping tombstoned %lx", track->id());
+          it->Next();
+          continue;
+        }
 
-      uint64_t new_hash = tags.Hash();
-      if (new_hash != track->tags_hash()) {
-        // This track's tags have changed. Since the filepath is exactly the
-        // same, we assume this is a legitimate correction. Update the
-        // database.
-        ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash(),
-                 new_hash);
-        dbPutTrackData(track->UpdateHash(new_hash));
-        dbPutHash(new_hash, track->id());
-      }
+        TrackTags tags{};
+        if (!tag_parser_->ReadAndParseTags(track->filepath(), &tags) ||
+            tags.encoding() == Encoding::kUnsupported) {
+          // We couldn't read the tags for this track. Either they were
+          // malformed, or perhaps the file is missing. Either way, tombstone
+          // this record.
+          ESP_LOGW(kTag, "entombing missing #%lx", track->id());
+          dbPutTrackData(track->Entomb());
+          it->Next();
+          continue;
+        }
 
-      it->Next();
+        // At this point, we know that the track still exists in its original
+        // location. All that's left to do is update any metadata about it.
+
+        uint64_t new_hash = tags.Hash();
+        if (new_hash != track->tags_hash()) {
+          // This track's tags have changed. Since the filepath is exactly the
+          // same, we assume this is a legitimate correction. Update the
+          // database.
+          ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash(),
+                   new_hash);
+          dbPutTrackData(track->UpdateHash(new_hash));
+          dbPutHash(new_hash, track->id());
+        }
+
+        dbCreateIndexesForTrack({*track, tags});
+
+        it->Next();
+      }
+      delete it;
     }
-    delete it;
-    db_->ReleaseSnapshot(snapshot);
 
     // Stage 2: search for newly added files.
     ESP_LOGI(kTag, "scanning for new tracks");
     file_gatherer_->FindFiles("", [&](const std::string& path) {
       TrackTags tags;
       if (!tag_parser_->ReadAndParseTags(path, &tags) ||
-          tags.encoding == Encoding::kUnsupported) {
+          tags.encoding() == Encoding::kUnsupported) {
         // No parseable tags; skip this fiile.
         return;
       }
 
       // Check for any existing record with the same hash.
       uint64_t hash = tags.Hash();
-      OwningSlice key = CreateHashKey(hash);
+      OwningSlice key = EncodeHashKey(hash);
       std::optional<TrackId> existing_hash;
       std::string raw_entry;
       if (db_->Get(leveldb::ReadOptions(), key.slice, &raw_entry).ok()) {
@@ -207,7 +228,11 @@ auto Database::Update() -> std::future<void> {
         // malformed. Either way, record this as a new track.
         TrackId id = dbMintNewTrackId();
         ESP_LOGI(kTag, "recording new 0x%lx", id);
-        dbPutTrack(id, path, hash);
+
+        TrackData data(id, path, hash);
+        dbPutTrackData(data);
+        dbPutHash(hash, id);
+        dbCreateIndexesForTrack({data, tags});
         return;
       }
 
@@ -216,12 +241,14 @@ auto Database::Update() -> std::future<void> {
         // We found a hash that matches, but there's no data record? Weird.
         TrackData new_data(*existing_hash, path, hash);
         dbPutTrackData(new_data);
+        dbCreateIndexesForTrack({*existing_data, tags});
         return;
       }
 
       if (existing_data->is_tombstoned()) {
         ESP_LOGI(kTag, "exhuming track %lu", existing_data->id());
         dbPutTrackData(existing_data->Exhume(path));
+        dbCreateIndexesForTrack({*existing_data, tags});
       } else if (existing_data->filepath() != path) {
         ESP_LOGW(kTag, "tag hash collision");
       }
@@ -241,11 +268,41 @@ auto Database::GetTrackPath(TrackId id)
       });
 }
 
+auto Database::GetIndexes() -> std::vector<IndexInfo> {
+  // TODO(jacqueline): This probably needs to be async? When we have runtime
+  // configurable indexes, they will need to come from somewhere.
+  return {
+      kAllTracks,
+      kAlbumsByArtist,
+      kTracksByGenre,
+  };
+}
+
+auto Database::GetTracksByIndex(const IndexInfo& index, std::size_t page_size)
+    -> std::future<Result<IndexRecord>*> {
+  return worker_task_->Dispatch<Result<IndexRecord>*>(
+      [=, this]() -> Result<IndexRecord>* {
+        IndexKey::Header header{
+            .id = index.id,
+            .depth = 0,
+            .components_hash = 0,
+        };
+        OwningSlice prefix = EncodeIndexPrefix(header);
+        Continuation<IndexRecord> c{.iterator = nullptr,
+                                    .prefix = prefix.data,
+                                    .start_key = prefix.data,
+                                    .forward = true,
+                                    .was_prev_forward = true,
+                                    .page_size = page_size};
+        return dbGetPage(c);
+      });
+}
+
 auto Database::GetTracks(std::size_t page_size) -> std::future<Result<Track>*> {
   return worker_task_->Dispatch<Result<Track>*>([=, this]() -> Result<Track>* {
     Continuation<Track> c{.iterator = nullptr,
-                          .prefix = CreateDataPrefix().data,
-                          .start_key = CreateDataPrefix().data,
+                          .prefix = EncodeDataPrefix().data,
+                          .start_key = EncodeDataPrefix().data,
                           .forward = true,
                           .was_prev_forward = true,
                           .page_size = page_size};
@@ -276,6 +333,8 @@ auto Database::GetPage(Continuation<T>* c) -> std::future<Result<T>*> {
 
 template auto Database::GetPage<Track>(Continuation<Track>* c)
     -> std::future<Result<Track>*>;
+template auto Database::GetPage<IndexRecord>(Continuation<IndexRecord>* c)
+    -> std::future<Result<IndexRecord>*>;
 template auto Database::GetPage<std::string>(Continuation<std::string>* c)
     -> std::future<Result<std::string>*>;
 
@@ -300,23 +359,23 @@ auto Database::dbMintNewTrackId() -> TrackId {
 }
 
 auto Database::dbEntomb(TrackId id, uint64_t hash) -> void {
-  OwningSlice key = CreateHashKey(hash);
-  OwningSlice val = CreateHashValue(id);
+  OwningSlice key = EncodeHashKey(hash);
+  OwningSlice val = EncodeHashValue(id);
   if (!db_->Put(leveldb::WriteOptions(), key.slice, val.slice).ok()) {
     ESP_LOGE(kTag, "failed to entomb #%llx (id #%lx)", hash, id);
   }
 }
 
 auto Database::dbPutTrackData(const TrackData& s) -> void {
-  OwningSlice key = CreateDataKey(s.id());
-  OwningSlice val = CreateDataValue(s);
+  OwningSlice key = EncodeDataKey(s.id());
+  OwningSlice val = EncodeDataValue(s);
   if (!db_->Put(leveldb::WriteOptions(), key.slice, val.slice).ok()) {
     ESP_LOGE(kTag, "failed to write data for #%lx", s.id());
   }
 }
 
 auto Database::dbGetTrackData(TrackId id) -> std::optional<TrackData> {
-  OwningSlice key = CreateDataKey(id);
+  OwningSlice key = EncodeDataKey(id);
   std::string raw_val;
   if (!db_->Get(leveldb::ReadOptions(), key.slice, &raw_val).ok()) {
     ESP_LOGW(kTag, "no key found for #%lx", id);
@@ -326,15 +385,15 @@ auto Database::dbGetTrackData(TrackId id) -> std::optional<TrackData> {
 }
 
 auto Database::dbPutHash(const uint64_t& hash, TrackId i) -> void {
-  OwningSlice key = CreateHashKey(hash);
-  OwningSlice val = CreateHashValue(i);
+  OwningSlice key = EncodeHashKey(hash);
+  OwningSlice val = EncodeHashValue(i);
   if (!db_->Put(leveldb::WriteOptions(), key.slice, val.slice).ok()) {
     ESP_LOGE(kTag, "failed to write hash for #%lx", i);
   }
 }
 
 auto Database::dbGetHash(const uint64_t& hash) -> std::optional<TrackId> {
-  OwningSlice key = CreateHashKey(hash);
+  OwningSlice key = EncodeHashKey(hash);
   std::string raw_val;
   if (!db_->Get(leveldb::ReadOptions(), key.slice, &raw_val).ok()) {
     ESP_LOGW(kTag, "no key found for hash #%llx", hash);
@@ -343,11 +402,13 @@ auto Database::dbGetHash(const uint64_t& hash) -> std::optional<TrackId> {
   return ParseHashValue(raw_val);
 }
 
-auto Database::dbPutTrack(TrackId id,
-                          const std::string& path,
-                          const uint64_t& hash) -> void {
-  dbPutTrackData(TrackData(id, path, hash));
-  dbPutHash(hash, id);
+auto Database::dbCreateIndexesForTrack(Track track) -> void {
+  for (const IndexInfo& index : GetIndexes()) {
+    leveldb::WriteBatch writes;
+    if (Index(index, track, &writes)) {
+      db_->Write(leveldb::WriteOptions(), &writes);
+    }
+  }
 }
 
 template <typename T>
@@ -475,6 +536,31 @@ template auto Database::dbGetPage<std::string>(
     const Continuation<std::string>& c) -> Result<std::string>*;
 
 template <>
+auto Database::ParseRecord<IndexRecord>(const leveldb::Slice& key,
+                                        const leveldb::Slice& val)
+    -> std::optional<IndexRecord> {
+  std::optional<IndexKey> data = ParseIndexKey(key);
+  if (!data) {
+    return {};
+  }
+
+  // If there was a track id included for this key, then this is a leaf record.
+  // Fetch the actual track data instead of relying on the information in the
+  // key.
+  std::optional<Track> track;
+  if (data->track) {
+    std::optional<TrackData> track_data = dbGetTrackData(*data->track);
+    TrackTags track_tags;
+    if (track_data &&
+        tag_parser_->ReadAndParseTags(track_data->filepath(), &track_tags)) {
+      track.emplace(*track_data, track_tags);
+    }
+  }
+
+  return IndexRecord(*data, track);
+}
+
+template <>
 auto Database::ParseRecord<Track>(const leveldb::Slice& key,
                                   const leveldb::Slice& val)
     -> std::optional<Track> {
@@ -510,13 +596,46 @@ auto Database::ParseRecord<std::string>(const leveldb::Slice& key,
       }
     }
   }
-  stream << "\tval: 0x";
-  std::string str = val.ToString();
-  for (int i = 0; i < val.size(); i++) {
-    stream << std::hex << std::setfill('0') << std::setw(2)
-           << static_cast<int>(str[i]);
+  if (!val.empty()) {
+    stream << "\tval: 0x";
+    std::string str = val.ToString();
+    for (int i = 0; i < val.size(); i++) {
+      stream << std::hex << std::setfill('0') << std::setw(2)
+             << static_cast<int>(str[i]);
+    }
   }
   return stream.str();
 }
 
+IndexRecord::IndexRecord(const IndexKey& key, std::optional<Track> track)
+    : key_(key), track_(track) {}
+
+auto IndexRecord::text() const -> std::optional<shared_string> {
+  if (track_) {
+    return track_->TitleOrFilename();
+  }
+  return key_.item;
+}
+
+auto IndexRecord::track() const -> std::optional<Track> {
+  return track_;
+}
+
+auto IndexRecord::Expand(std::size_t page_size) const
+    -> std::optional<Continuation<IndexRecord>> {
+  if (track_) {
+    return {};
+  }
+  IndexKey::Header new_header = ExpandHeader(key_.header, key_.item);
+  OwningSlice new_prefix = EncodeIndexPrefix(new_header);
+  return Continuation<IndexRecord>{
+      .iterator = nullptr,
+      .prefix = new_prefix.data,
+      .start_key = new_prefix.data,
+      .forward = true,
+      .was_prev_forward = true,
+      .page_size = page_size,
+  };
+}
+
 }  // namespace database
diff --git a/src/database/include/database.hpp b/src/database/include/database.hpp
index 8fecc5f6..77a17b75 100644
--- a/src/database/include/database.hpp
+++ b/src/database/include/database.hpp
@@ -16,6 +16,7 @@
 #include <vector>
 
 #include "file_gatherer.hpp"
+#include "index.hpp"
 #include "leveldb/cache.h"
 #include "leveldb/db.h"
 #include "leveldb/iterator.h"
@@ -23,6 +24,7 @@
 #include "leveldb/slice.h"
 #include "records.hpp"
 #include "result.hpp"
+#include "shared_string.h"
 #include "tag_parser.hpp"
 #include "tasks.hpp"
 #include "track.hpp"
@@ -66,6 +68,20 @@ class Result {
   std::optional<Continuation<T>> prev_page_;
 };
 
+class IndexRecord {
+ public:
+  explicit IndexRecord(const IndexKey&, std::optional<Track>);
+
+  auto text() const -> std::optional<shared_string>;
+  auto track() const -> std::optional<Track>;
+
+  auto Expand(std::size_t) const -> std::optional<Continuation<IndexRecord>>;
+
+ private:
+  IndexKey key_;
+  std::optional<Track> track_;
+};
+
 class Database {
  public:
   enum DatabaseError {
@@ -84,6 +100,9 @@ class Database {
 
   auto GetTrackPath(TrackId id) -> std::future<std::optional<std::string>>;
 
+  auto GetIndexes() -> std::vector<IndexInfo>;
+  auto GetTracksByIndex(const IndexInfo& index, std::size_t page_size)
+      -> std::future<Result<IndexRecord>*>;
   auto GetTracks(std::size_t page_size) -> std::future<Result<Track>*>;
   auto GetDump(std::size_t page_size) -> std::future<Result<std::string>*>;
 
@@ -118,8 +137,7 @@ class Database {
   auto dbGetTrackData(TrackId id) -> std::optional<TrackData>;
   auto dbPutHash(const uint64_t& hash, TrackId i) -> void;
   auto dbGetHash(const uint64_t& hash) -> std::optional<TrackId>;
-  auto dbPutTrack(TrackId id, const std::string& path, const uint64_t& hash)
-      -> void;
+  auto dbCreateIndexesForTrack(Track track) -> void;
 
   template <typename T>
   auto dbGetPage(const Continuation<T>& c) -> Result<T>*;
@@ -130,6 +148,10 @@ class Database {
 };
 
 template <>
+auto Database::ParseRecord<IndexRecord>(const leveldb::Slice& key,
+                                        const leveldb::Slice& val)
+    -> std::optional<IndexRecord>;
+template <>
 auto Database::ParseRecord<Track>(const leveldb::Slice& key,
                                   const leveldb::Slice& val)
     -> std::optional<Track>;
diff --git a/src/database/include/index.hpp b/src/database/include/index.hpp
new file mode 100644
index 00000000..17229164
--- /dev/null
+++ b/src/database/include/index.hpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <cstdint>
+#include <string>
+#include <variant>
+#include <vector>
+
+#include "leveldb/db.h"
+#include "leveldb/slice.h"
+
+#include "leveldb/write_batch.h"
+#include "shared_string.h"
+#include "track.hpp"
+
+namespace database {
+
+typedef uint8_t IndexId;
+
+struct IndexInfo {
+  // Unique id for this index
+  IndexId id;
+  // Localised, user-friendly description of this index. e.g. "Albums by Artist"
+  // or "All Tracks".
+  std::string name;
+  // Specifier for how this index breaks down the database.
+  std::vector<Tag> components;
+};
+
+struct IndexKey {
+  struct Header {
+    // The index that this key was created for.
+    IndexId id;
+    // The number of components of IndexInfo that have already been filtered.
+    // For example, if an index consists of { kGenre, kArtist }, and this key
+    // represents an artist, then depth = 1.
+    std::uint8_t depth;
+    // The cumulative hash of all filtered components, in order. For example, if
+    // an index consists of { kArtist, kAlbum, kTitle }, and we are at depth = 2
+    // then this may contain hash(hash("Jacqueline"), "My Cool Album").
+    std::uint64_t components_hash;
+  };
+  Header header;
+
+  // The filterable / selectable item that this key represents. "Jacqueline" for
+  // kArtist, "My Cool Album" for kAlbum, etc.
+  std::optional<std::string> item;
+  // If this is a leaf component, the track id for this record.
+  // This could reasonably be the value for a record, but we keep it as a part
+  // of the key to help with disambiguation.
+  std::optional<TrackId> track;
+};
+
+auto Index(const IndexInfo&, const Track&, leveldb::WriteBatch*) -> bool;
+auto ExpandHeader(const IndexKey::Header&, const std::optional<std::string>&)
+    -> IndexKey::Header;
+
+// Predefined indexes
+// TODO(jacqueline): Make these defined at runtime! :)
+
+extern const IndexInfo kAlbumsByArtist;
+extern const IndexInfo kTracksByGenre;
+extern const IndexInfo kAllTracks;
+
+}  // namespace database
diff --git a/src/database/include/records.hpp b/src/database/include/records.hpp
index 95a1a1e8..58f29b20 100644
--- a/src/database/include/records.hpp
+++ b/src/database/include/records.hpp
@@ -9,10 +9,14 @@
 #include <stdint.h>
 
 #include <string>
+#include <variant>
+#include <vector>
 
 #include "leveldb/db.h"
 #include "leveldb/slice.h"
 
+#include "index.hpp"
+#include "shared_string.h"
 #include "track.hpp"
 
 namespace database {
@@ -34,39 +38,49 @@ class OwningSlice {
  * Returns the prefix added to every TrackData key. This can be used to iterate
  * over every data record in the database.
  */
-auto CreateDataPrefix() -> OwningSlice;
+auto EncodeDataPrefix() -> OwningSlice;
 
-/* Creates a data key for a track with the specified id. */
-auto CreateDataKey(const TrackId& id) -> OwningSlice;
+/* Encodes a data key for a track with the specified id. */
+auto EncodeDataKey(const TrackId& id) -> OwningSlice;
 
 /*
  * Encodes a TrackData instance into bytes, in preparation for storing it within
  * the database. This encoding is consistent, and will remain stable over time.
  */
-auto CreateDataValue(const TrackData& track) -> OwningSlice;
+auto EncodeDataValue(const TrackData& track) -> OwningSlice;
 
 /*
- * Parses bytes previously encoded via CreateDataValue back into a TrackData.
+ * Parses bytes previously encoded via EncodeDataValue back into a TrackData.
  * May return nullopt if parsing fails.
  */
 auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<TrackData>;
 
-/* Creates a hash key for the specified hash. */
-auto CreateHashKey(const uint64_t& hash) -> OwningSlice;
+/* Encodes a hash key for the specified hash. */
+auto EncodeHashKey(const uint64_t& hash) -> OwningSlice;
 
 /*
  * Encodes a hash value (at this point just a track id) into bytes, in
  * preparation for storing within the database. This encoding is consistent, and
  * will remain stable over time.
  */
-auto CreateHashValue(TrackId id) -> OwningSlice;
+auto EncodeHashValue(TrackId id) -> OwningSlice;
 
 /*
- * Parses bytes previously encoded via CreateHashValue back into a track id. May
+ * Parses bytes previously encoded via EncodeHashValue back into a track id. May
  * return nullopt if parsing fails.
  */
 auto ParseHashValue(const leveldb::Slice&) -> std::optional<TrackId>;
 
+/* Encodes a prefix that matches all index keys, of all ids and depths. */
+auto EncodeAllIndexesPrefix() -> OwningSlice;
+
+/*
+ */
+auto EncodeIndexPrefix(const IndexKey::Header&) -> OwningSlice;
+
+auto EncodeIndexKey(const IndexKey&) -> OwningSlice;
+auto ParseIndexKey(const leveldb::Slice&) -> std::optional<IndexKey>;
+
 /* Encodes a TrackId as bytes. */
 auto TrackIdToBytes(TrackId id) -> OwningSlice;
 
diff --git a/src/database/include/track.hpp b/src/database/include/track.hpp
index 5a0c0ca8..e3f94db4 100644
--- a/src/database/include/track.hpp
+++ b/src/database/include/track.hpp
@@ -8,11 +8,14 @@
 
 #include <stdint.h>
 
+#include <map>
+#include <memory>
 #include <optional>
 #include <string>
 #include <utility>
 
 #include "leveldb/db.h"
+#include "shared_string.h"
 #include "span.hpp"
 
 namespace database {
@@ -41,25 +44,33 @@ enum class Encoding {
   kFlac = 4,
 };
 
+enum class Tag {
+  kTitle = 0,
+  kArtist = 1,
+  kAlbum = 2,
+  kAlbumTrack = 3,
+  kGenre = 4,
+};
+
 /*
  * Owning container for tag-related track metadata that was extracted from a
  * file.
  */
-struct TrackTags {
-  Encoding encoding;
-  std::optional<std::string> title;
-
-  // TODO(jacqueline): It would be nice to use shared_ptr's for the artist and
-  // album, since there's likely a fair number of duplicates for each
-  // (especially the former).
+class TrackTags {
+ public:
+  auto encoding() const -> Encoding { return encoding_; };
+  auto encoding(Encoding e) -> void { encoding_ = e; };
 
-  std::optional<std::string> artist;
-  std::optional<std::string> album;
+  TrackTags() : encoding_(Encoding::kUnsupported) {}
 
   std::optional<int> channels;
   std::optional<int> sample_rate;
   std::optional<int> bits_per_sample;
 
+  auto set(const Tag& key, const std::string& val) -> void;
+  auto at(const Tag& key) const -> std::optional<shared_string>;
+  auto operator[](const Tag& key) const -> std::optional<shared_string>;
+
   /*
    * Returns a hash of the 'identifying' tags of this track. That is, a hash
    * that can be used to determine if one track is likely the same as another,
@@ -69,6 +80,12 @@ struct TrackTags {
   auto Hash() const -> uint64_t;
 
   bool operator==(const TrackTags&) const = default;
+  TrackTags& operator=(const TrackTags&) = default;
+  TrackTags(const TrackTags&) = default;
+
+ private:
+  Encoding encoding_;
+  std::map<Tag, shared_string> tags_;
 };
 
 /*
@@ -156,6 +173,8 @@ class Track {
   auto data() const -> const TrackData& { return data_; }
   auto tags() const -> const TrackTags& { return tags_; }
 
+  auto TitleOrFilename() const -> shared_string;
+
   bool operator==(const Track&) const = default;
   Track operator=(const Track& other) const { return Track(other); }
 
diff --git a/src/database/index.cpp b/src/database/index.cpp
new file mode 100644
index 00000000..a828578d
--- /dev/null
+++ b/src/database/index.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "index.hpp"
+#include <stdint.h>
+#include <variant>
+#include "komihash.h"
+#include "leveldb/write_batch.h"
+#include "records.hpp"
+
+namespace database {
+
+const IndexInfo kAlbumsByArtist{
+    .id = 1,
+    .name = "Albums by Artist",
+    .components = {Tag::kArtist, Tag::kAlbum, Tag::kAlbumTrack},
+};
+
+const IndexInfo kTracksByGenre{
+    .id = 2,
+    .name = "Tracks by Genre",
+    .components = {Tag::kGenre, Tag::kTitle},
+};
+
+const IndexInfo kAllTracks{
+    .id = 3,
+    .name = "All Tracks",
+    .components = {Tag::kTitle},
+};
+
+auto Index(const IndexInfo& info, const Track& t, leveldb::WriteBatch* batch)
+    -> bool {
+  IndexKey key{
+      .header{
+          .id = info.id,
+          .depth = 0,
+          .components_hash = 0,
+      },
+      .item = {},
+      .track = {},
+  };
+
+  for (std::uint8_t i = 0; i < info.components.size(); i++) {
+    // Fill in the text for this depth.
+    auto text = t.tags().at(info.components.at(i));
+    if (text) {
+      key.item = *text;
+    } else {
+      key.item = {};
+    }
+
+    // If this is the last component, then we should also fill in the track id.
+    if (i == info.components.size() - 1) {
+      key.track = t.data().id();
+    } else {
+      key.track = {};
+    }
+
+    auto encoded = EncodeIndexKey(key);
+    batch->Put(encoded.slice, leveldb::Slice{});
+
+    // If there are more components after this, then we need to finish by
+    // narrowing the header with the current title.
+    if (i < info.components.size() - 1) {
+      key.header = ExpandHeader(key.header, key.item);
+    }
+  }
+  return true;
+}
+
+auto ExpandHeader(const IndexKey::Header& header,
+                  const std::optional<std::string>& component)
+    -> IndexKey::Header {
+  IndexKey::Header ret{header};
+  ret.depth++;
+  if (component) {
+    ret.components_hash =
+        komihash(component->data(), component->size(), ret.components_hash);
+  } else {
+    ret.components_hash = komihash(NULL, 0, ret.components_hash);
+  }
+  return ret;
+}
+
+}  // namespace database
diff --git a/src/database/records.cpp b/src/database/records.cpp
index 49e5db0b..72608eb0 100644
--- a/src/database/records.cpp
+++ b/src/database/records.cpp
@@ -8,20 +8,43 @@
 
 #include <stdint.h>
 
+#include <iomanip>
 #include <sstream>
+#include <string>
 #include <vector>
 
 #include "cbor.h"
 #include "esp_log.h"
 
+#include "index.hpp"
+#include "komihash.h"
+#include "shared_string.h"
 #include "track.hpp"
 
+// As LevelDB is a key-value store, each record in the database consists of a
+// key and an optional value.
+//
+// Values, when present, are always cbor-encoded. This is fast, compact, and
+// very easy to evolve over time due to its inclusion of type information.
+//
+// Keys have a more complicated scheme, as for performance we rely heavily on
+// LevelDB's sorted storage format. We must therefore worry about clustering of
+// similar records, and the sortability of our encoding format.
+//    Each kind of key consists of a a single-byte prefix, then one or more
+// fields separated by null (0) bytes. Each field may be cbor-encoded, or may
+// use some bespoke encoding; it depends on whether we want to be able to sort
+// by that field.
+//    For debugging and discussion purposes, we represent field separators
+// textually as '/', and write each field as its hex encoding. e.g. a data key
+// for the track with id 17 would be written as 'D / 0x11'.
+
 namespace database {
 
 static const char* kTag = "RECORDS";
 
 static const char kDataPrefix = 'D';
 static const char kHashPrefix = 'H';
+static const char kIndexPrefix = 'I';
 static const char kFieldSeparator = '\0';
 
 /*
@@ -39,6 +62,8 @@ static const char kFieldSeparator = '\0';
 template <typename T>
 auto cbor_encode(uint8_t** out_buf, T fn) -> std::size_t {
   // First pass: work out how many bytes we will encode into.
+  // FIXME: With benchmarking to help, we could consider preallocting a small
+  // buffer here to do the whole encoding in one pass.
   CborEncoder size_encoder;
   cbor_encoder_init(&size_encoder, NULL, 0, 0);
   std::invoke(fn, &size_encoder);
@@ -55,19 +80,21 @@ auto cbor_encode(uint8_t** out_buf, T fn) -> std::size_t {
 
 OwningSlice::OwningSlice(std::string d) : data(d), slice(data) {}
 
-auto CreateDataPrefix() -> OwningSlice {
+/* 'D/' */
+auto EncodeDataPrefix() -> OwningSlice {
   char data[2] = {kDataPrefix, kFieldSeparator};
   return OwningSlice({data, 2});
 }
 
-auto CreateDataKey(const TrackId& id) -> OwningSlice {
+/* 'D/ 0xACAB' */
+auto EncodeDataKey(const TrackId& id) -> OwningSlice {
   std::ostringstream output;
   output.put(kDataPrefix).put(kFieldSeparator);
   output << TrackIdToBytes(id).data;
   return OwningSlice(output.str());
 }
 
-auto CreateDataValue(const TrackData& track) -> OwningSlice {
+auto EncodeDataValue(const TrackData& track) -> OwningSlice {
   uint8_t* buf;
   std::size_t buf_len = cbor_encode(&buf, [&](CborEncoder* enc) {
     CborEncoder array_encoder;
@@ -179,7 +206,8 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<TrackData> {
   return TrackData(id, path, hash, play_count, is_tombstoned);
 }
 
-auto CreateHashKey(const uint64_t& hash) -> OwningSlice {
+/* 'H/ 0xBEEF' */
+auto EncodeHashKey(const uint64_t& hash) -> OwningSlice {
   std::ostringstream output;
   output.put(kHashPrefix).put(kFieldSeparator);
 
@@ -197,10 +225,183 @@ auto ParseHashValue(const leveldb::Slice& slice) -> std::optional<TrackId> {
   return BytesToTrackId(slice.ToString());
 }
 
-auto CreateHashValue(TrackId id) -> OwningSlice {
+auto EncodeHashValue(TrackId id) -> OwningSlice {
   return TrackIdToBytes(id);
 }
 
+/* 'I/' */
+auto EncodeAllIndexesPrefix() -> OwningSlice {
+  char data[2] = {kIndexPrefix, kFieldSeparator};
+  return OwningSlice({data, 2});
+}
+
+auto AppendIndexHeader(const IndexKey::Header& header, std::ostringstream* out)
+    -> void {
+  *out << kIndexPrefix << kFieldSeparator;
+
+  // Construct the header.
+  uint8_t* buf;
+  std::size_t buf_len = cbor_encode(&buf, [&](CborEncoder* enc) {
+    CborEncoder array_encoder;
+    CborError err;
+    err = cbor_encoder_create_array(enc, &array_encoder, 3);
+    if (err != CborNoError && err != CborErrorOutOfMemory) {
+      ESP_LOGE(kTag, "encoding err %u", err);
+      return;
+    }
+    err = cbor_encode_uint(&array_encoder, header.id);
+    if (err != CborNoError && err != CborErrorOutOfMemory) {
+      ESP_LOGE(kTag, "encoding err %u", err);
+      return;
+    }
+    err = cbor_encode_uint(&array_encoder, header.depth);
+    if (err != CborNoError && err != CborErrorOutOfMemory) {
+      ESP_LOGE(kTag, "encoding err %u", err);
+      return;
+    }
+    err = cbor_encode_uint(&array_encoder, header.components_hash);
+    if (err != CborNoError && err != CborErrorOutOfMemory) {
+      ESP_LOGE(kTag, "encoding err %u", err);
+      return;
+    }
+    err = cbor_encoder_close_container(enc, &array_encoder);
+    if (err != CborNoError && err != CborErrorOutOfMemory) {
+      ESP_LOGE(kTag, "encoding err %u", err);
+      return;
+    }
+  });
+  std::string encoded{reinterpret_cast<char*>(buf), buf_len};
+  delete buf;
+  *out << encoded << kFieldSeparator;
+}
+
+auto EncodeIndexPrefix(const IndexKey::Header& header) -> OwningSlice {
+  std::ostringstream out;
+  AppendIndexHeader(header, &out);
+  return OwningSlice(out.str());
+}
+
+/*
+ * 'I/0xa2/0x686921/0xb9'
+ *                   ^ --- trailer
+ *          ^ --- component ("hi!")
+ *     ^ -------- header
+ *
+ *  The components *must* be encoded in a way that is easy to sort
+ *  lexicographically. The header and footer do not have this restriction, so
+ *  cbor is fine.
+ *
+ *  We store grouping information within the header; which index, filtered
+ *  components. We store disambiguation information in the trailer; just a track
+ *  id for now, but could reasonably be something like 'release year' as well.
+ */
+auto EncodeIndexKey(const IndexKey& key) -> OwningSlice {
+  std::ostringstream out;
+
+  // Construct the header.
+  AppendIndexHeader(key.header, &out);
+
+  // The component should already be UTF-8 encoded, so just write it.
+  if (key.item) {
+    out << *key.item;
+  }
+
+  // Construct the footer.
+  out << kFieldSeparator;
+  if (key.track) {
+    out << TrackIdToBytes(*key.track).data;
+  }
+  return OwningSlice(out.str());
+}
+
+auto ParseIndexKey(const leveldb::Slice& slice) -> std::optional<IndexKey> {
+  IndexKey result{};
+
+  auto prefix = EncodeAllIndexesPrefix();
+  if (!slice.starts_with(prefix.data)) {
+    return {};
+  }
+
+  std::string key_data = slice.ToString().substr(prefix.data.size());
+  std::size_t header_length = 0;
+  {
+    CborParser parser;
+    CborValue container;
+    CborError err;
+    err = cbor_parser_init(reinterpret_cast<const uint8_t*>(key_data.data()),
+                           key_data.size(), 0, &parser, &container);
+    if (err != CborNoError || !cbor_value_is_container(&container)) {
+      return {};
+    }
+
+    CborValue val;
+    err = cbor_value_enter_container(&container, &val);
+    if (err != CborNoError || !cbor_value_is_unsigned_integer(&val)) {
+      return {};
+    }
+
+    uint64_t raw_int;
+    err = cbor_value_get_uint64(&val, &raw_int);
+    if (err != CborNoError) {
+      return {};
+    }
+    result.header.id = raw_int;
+    err = cbor_value_advance(&val);
+    if (err != CborNoError || !cbor_value_is_unsigned_integer(&val)) {
+      return {};
+    }
+
+    err = cbor_value_get_uint64(&val, &raw_int);
+    if (err != CborNoError) {
+      return {};
+    }
+    result.header.depth = raw_int;
+    err = cbor_value_advance(&val);
+    if (err != CborNoError || !cbor_value_is_unsigned_integer(&val)) {
+      return {};
+    }
+
+    err = cbor_value_get_uint64(&val, &raw_int);
+    if (err != CborNoError) {
+      return {};
+    }
+    result.header.components_hash = raw_int;
+    err = cbor_value_advance(&val);
+    if (err != CborNoError || !cbor_value_at_end(&val)) {
+      return {};
+    }
+
+    const uint8_t* next_byte = cbor_value_get_next_byte(&val);
+    header_length =
+        next_byte - reinterpret_cast<const uint8_t*>(key_data.data());
+  }
+
+  if (header_length == 0) {
+    return {};
+  }
+
+  if (header_length >= key_data.size()) {
+    return {};
+  }
+
+  std::istringstream in(key_data.substr(header_length + 1));
+  std::stringbuf buffer{};
+
+  in.get(buffer, kFieldSeparator);
+  if (buffer.str().size() > 0) {
+    result.item = buffer.str();
+  }
+
+  buffer = {};
+  in.get(buffer);
+  if (buffer.str().size() > 1) {
+    std::string raw_id = buffer.str().substr(1);
+    result.track = BytesToTrackId(raw_id);
+  }
+
+  return result;
+}
+
 auto TrackIdToBytes(TrackId id) -> OwningSlice {
   uint8_t buf[8];
   CborEncoder enc;
diff --git a/src/database/tag_parser.cpp b/src/database/tag_parser.cpp
index 83b0a796..49febe27 100644
--- a/src/database/tag_parser.cpp
+++ b/src/database/tag_parser.cpp
@@ -12,6 +12,23 @@
 
 namespace database {
 
+auto convert_tag(int tag) -> std::optional<Tag> {
+  switch (tag) {
+    case Ttitle:
+      return Tag::kTitle;
+    case Tartist:
+      return Tag::kArtist;
+    case Talbum:
+      return Tag::kAlbum;
+    case Ttrack:
+      return Tag::kAlbumTrack;
+    case Tgenre:
+      return Tag::kGenre;
+    default:
+      return {};
+  }
+}
+
 namespace libtags {
 
 struct Aux {
@@ -55,12 +72,9 @@ static void tag(Tagctx* ctx,
                 int size,
                 Tagread f) {
   Aux* aux = reinterpret_cast<Aux*>(ctx->aux);
-  if (t == Ttitle) {
-    aux->tags->title = v;
-  } else if (t == Tartist) {
-    aux->tags->artist = v;
-  } else if (t == Talbum) {
-    aux->tags->album = v;
+  auto tag = convert_tag(t);
+  if (tag) {
+    aux->tags->set(*tag, v);
   }
 }
 
@@ -108,19 +122,19 @@ auto TagParserImpl::ReadAndParseTags(const std::string& path, TrackTags* out)
 
   switch (ctx.format) {
     case Fmp3:
-      out->encoding = Encoding::kMp3;
+      out->encoding(Encoding::kMp3);
       break;
     case Fogg:
-      out->encoding = Encoding::kOgg;
+      out->encoding(Encoding::kOgg);
       break;
     case Fflac:
-      out->encoding = Encoding::kFlac;
+      out->encoding(Encoding::kFlac);
       break;
     case Fwav:
-      out->encoding = Encoding::kWav;
+      out->encoding(Encoding::kWav);
       break;
     default:
-      out->encoding = Encoding::kUnsupported;
+      out->encoding(Encoding::kUnsupported);
   }
 
   if (ctx.channels > 0) {
diff --git a/src/database/track.cpp b/src/database/track.cpp
index 00acc1f6..dc33701d 100644
--- a/src/database/track.cpp
+++ b/src/database/track.cpp
@@ -7,11 +7,28 @@
 #include "track.hpp"
 
 #include <komihash.h>
+#include "shared_string.h"
 
 namespace database {
 
+auto TrackTags::set(const Tag& key, const std::string& val) -> void {
+  tags_[key] = val;
+}
+
+auto TrackTags::at(const Tag& key) const -> std::optional<shared_string> {
+  if (tags_.contains(key)) {
+    return tags_.at(key);
+  }
+  return {};
+}
+
+auto TrackTags::operator[](const Tag& key) const
+    -> std::optional<shared_string> {
+  return at(key);
+}
+
 /* Helper function to update a komihash stream with a std::string. */
-auto HashString(komihash_stream_t* stream, std::string str) -> void {
+auto HashString(komihash_stream_t* stream, const std::string& str) -> void {
   komihash_stream_update(stream, str.c_str(), str.length());
 }
 
@@ -24,9 +41,11 @@ auto TrackTags::Hash() const -> uint64_t {
   // tags at all.
   komihash_stream_t stream;
   komihash_stream_init(&stream, 0);
-  HashString(&stream, title.value_or(""));
-  HashString(&stream, artist.value_or(""));
-  HashString(&stream, album.value_or(""));
+
+  HashString(&stream, at(Tag::kTitle).value_or(""));
+  HashString(&stream, at(Tag::kArtist).value_or(""));
+  HashString(&stream, at(Tag::kAlbum).value_or(""));
+
   return komihash_stream_final(&stream);
 }
 
@@ -48,4 +67,16 @@ void swap(Track& first, Track& second) {
   second = temp;
 }
 
+auto Track::TitleOrFilename() const -> shared_string {
+  auto title = tags().at(Tag::kTitle);
+  if (title) {
+    return *title;
+  }
+  auto start = data().filepath().find_last_of('/');
+  if (start == std::string::npos) {
+    return data().filepath();
+  }
+  return data().filepath().substr(start);
+}
+
 }  // namespace database