Add some basic tests for the database

author: jacqueline <me@jacqueline.id.au> 2023-05-12 10:30:07 +1000
committer: jacqueline <me@jacqueline.id.au> 2023-05-12 10:30:07 +1000
commit: 961c8014ada037712e8c72f23430362e9f14c1ec (patch)
tree: ce13e0a00fc0d0318d46e6dfbecf2360b4cc5e14 /src/database
parent: 10eb120878e01579bff2fdfab7bef59639b21155 (diff)
download: tangara-fw-961c8014ada037712e8c72f23430362e9f14c1ec.tar.gz
15 files changed, 617 insertions, 211 deletions
diff --git a/src/database/CMakeLists.txt b/src/database/CMakeLists.txt
index d5748342..897bf029 100644
--- a/src/database/CMakeLists.txt
+++ b/src/database/CMakeLists.txt
@@ -1,5 +1,5 @@
 idf_component_register(
-  SRCS "env_esp.cpp" "database.cpp" "song.cpp" "db_task.cpp" "records.cpp"
+  SRCS "env_esp.cpp" "database.cpp" "song.cpp" "db_task.cpp" "records.cpp" "file_gatherer.cpp" "tag_parser.cpp"
   INCLUDE_DIRS "include"
   REQUIRES "result" "span" "esp_psram" "fatfs" "libtags" "komihash" "cbor")
 
diff --git a/src/database/database.cpp b/src/database/database.cpp
index 47ad8ca6..df10ebb9 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -1,5 +1,7 @@
 #include "database.hpp"
+
 #include <stdint.h>
+
 #include <cstdint>
 #include <functional>
 #include <iomanip>
@@ -8,28 +10,32 @@
 #include "esp_log.h"
 #include "ff.h"
 #include "leveldb/cache.h"
-
-#include "db_task.hpp"
-#include "env_esp.hpp"
-#include "file_gatherer.hpp"
 #include "leveldb/db.h"
 #include "leveldb/iterator.h"
 #include "leveldb/options.h"
 #include "leveldb/slice.h"
 #include "leveldb/write_batch.h"
+
+#include "db_task.hpp"
+#include "env_esp.hpp"
+#include "file_gatherer.hpp"
 #include "records.hpp"
 #include "result.hpp"
 #include "song.hpp"
+#include "tag_parser.hpp"
 
 namespace database {
 
 static SingletonEnv<leveldb::EspEnv> sEnv;
 static const char* kTag = "DB";
 
-static const std::string kSongIdKey("next_song_id");
+static const char kSongIdKey[] = "next_song_id";
 
 static std::atomic<bool> sIsDbOpen(false);
 
+static FileGathererImpl sFileGatherer;
+static TagParserImpl sTagParser;
+
 template <typename Parser>
 auto IterateAndParse(leveldb::Iterator* it, std::size_t limit, Parser p)
     -> void {
@@ -44,6 +50,11 @@ auto IterateAndParse(leveldb::Iterator* it, std::size_t limit, Parser p)
 }
 
 auto Database::Open() -> cpp::result<Database*, DatabaseError> {
+  return Open(&sFileGatherer, &sTagParser);
+}
+
+auto Database::Open(IFileGatherer* gatherer, ITagParser* parser)
+    -> cpp::result<Database*, DatabaseError> {
   // TODO(jacqueline): Why isn't compare_and_exchange_* available?
   if (sIsDbOpen.exchange(true)) {
     return cpp::fail(DatabaseError::ALREADY_OPEN);
@@ -54,7 +65,7 @@ auto Database::Open() -> cpp::result<Database*, DatabaseError> {
   }
 
   return RunOnDbTask<cpp::result<Database*, DatabaseError>>(
-             []() -> cpp::result<Database*, DatabaseError> {
+             [=]() -> cpp::result<Database*, DatabaseError> {
                leveldb::DB* db;
                leveldb::Cache* cache = leveldb::NewLRUCache(24 * 1024);
                leveldb::Options options;
@@ -74,15 +85,32 @@ auto Database::Open() -> cpp::result<Database*, DatabaseError> {
                }
 
                ESP_LOGI(kTag, "Database opened successfully");
-               return new Database(db, cache);
+               return new Database(db, cache, gatherer, parser);
              })
       .get();
 }
 
-Database::Database(leveldb::DB* db, leveldb::Cache* cache)
-    : db_(db), cache_(cache) {}
+auto Database::Destroy() -> void {
+  leveldb::Options options;
+  options.env = sEnv.env();
+  leveldb::DestroyDB("/.db", options);
+}
+
+Database::Database(leveldb::DB* db,
+                   leveldb::Cache* cache,
+                   IFileGatherer* file_gatherer,
+                   ITagParser* tag_parser)
+    : db_(db),
+      cache_(cache),
+      file_gatherer_(file_gatherer),
+      tag_parser_(tag_parser) {}
 
 Database::~Database() {
+  // Delete db_ first so that any outstanding background work finishes before
+  // the background task is killed.
+  delete db_;
+  delete cache_;
+
   QuitDbTask();
   sIsDbOpen.store(false);
 }
@@ -115,7 +143,8 @@ auto Database::Update() -> std::future<void> {
       }
 
       SongTags tags;
-      if (!ReadAndParseTags(song->filepath(), &tags)) {
+      if (!tag_parser_->ReadAndParseTags(song->filepath(), &tags) ||
+          tags.encoding == Encoding::kUnsupported) {
         // We couldn't read the tags for this song. Either they were
         // malformed, or perhaps the file is missing. Either way, tombstone
         // this record.
@@ -143,9 +172,10 @@ auto Database::Update() -> std::future<void> {
 
     // Stage 2: search for newly added files.
     ESP_LOGI(kTag, "scanning for new songs");
-    FindFiles("", [&](const std::string& path) {
+    file_gatherer_->FindFiles("", [&](const std::string& path) {
       SongTags tags;
-      if (!ReadAndParseTags(path, &tags)) {
+      if (!tag_parser_->ReadAndParseTags(path, &tags) ||
+          tags.encoding == Encoding::kUnsupported) {
         // No parseable tags; skip this fiile.
         return;
       }
@@ -186,29 +216,16 @@ auto Database::Update() -> std::future<void> {
   });
 }
 
-auto Database::Destroy() -> std::future<void> {
-  return RunOnDbTask<void>([&]() -> void {
-    const leveldb::Snapshot* snap = db_->GetSnapshot();
-    leveldb::ReadOptions options;
-    options.snapshot = snap;
-    leveldb::Iterator* it = db_->NewIterator(options);
-    it->SeekToFirst();
-    while (it->Valid()) {
-      db_->Delete(leveldb::WriteOptions(), it->key());
-      it->Next();
-    }
-    db_->ReleaseSnapshot(snap);
-  });
-}
-
 auto Database::dbMintNewSongId() -> SongId {
+  SongId next_id = 1;
   std::string val;
   auto status = db_->Get(leveldb::ReadOptions(), kSongIdKey, &val);
-  if (!status.ok()) {
-    // TODO(jacqueline): check the db is actually empty.
-    ESP_LOGW(kTag, "error getting next id: %s", status.ToString().c_str());
+  if (status.ok()) {
+    next_id = BytesToSongId(val).value_or(next_id);
+  } else if (!status.IsNotFound()) {
+    // TODO(jacqueline): Handle this more.
+    ESP_LOGE(kTag, "failed to get next song id");
   }
-  SongId next_id = BytesToSongId(val);
 
   if (!db_->Put(leveldb::WriteOptions(), kSongIdKey,
                 SongIdToBytes(next_id + 1).slice)
@@ -270,14 +287,15 @@ auto Database::dbPutSong(SongId id,
   dbPutHash(hash, id);
 }
 
-auto parse_song(const leveldb::Slice& key, const leveldb::Slice& value)
-    -> std::optional<Song> {
+auto parse_song(ITagParser* parser,
+                const leveldb::Slice& key,
+                const leveldb::Slice& value) -> std::optional<Song> {
   std::optional<SongData> data = ParseDataValue(value);
   if (!data) {
     return {};
   }
   SongTags tags;
-  if (!ReadAndParseTags(data->filepath(), &tags)) {
+  if (!parser->ReadAndParseTags(data->filepath(), &tags)) {
     return {};
   }
   return Song(*data, tags);
@@ -285,7 +303,8 @@ auto parse_song(const leveldb::Slice& key, const leveldb::Slice& value)
 
 auto Database::GetSongs(std::size_t page_size) -> std::future<Result<Song>> {
   return RunOnDbTask<Result<Song>>([=, this]() -> Result<Song> {
-    return Query<Song>(CreateDataPrefix().slice, page_size, &parse_song);
+    return Query<Song>(CreateDataPrefix().slice, page_size,
+                       std::bind_front(&parse_song, tag_parser_));
   });
 }
 
@@ -293,7 +312,8 @@ auto Database::GetMoreSongs(std::size_t page_size, Continuation c)
     -> std::future<Result<Song>> {
   leveldb::Iterator* it = c.release();
   return RunOnDbTask<Result<Song>>([=, this]() -> Result<Song> {
-    return Query<Song>(it, page_size, &parse_song);
+    return Query<Song>(it, page_size,
+                       std::bind_front(&parse_song, tag_parser_));
   });
 }
 
diff --git a/src/database/db_task.cpp b/src/database/db_task.cpp
index ce1cd98a..5b4b34b5 100644
--- a/src/database/db_task.cpp
+++ b/src/database/db_task.cpp
@@ -46,13 +46,13 @@ void DatabaseTaskMain(void* args) {
   while (true) {
     WorkItem item;
     if (xQueueReceive(sWorkQueue, &item, portMAX_DELAY)) {
-      if (item.quit) {
-        break;
-      }
       if (item.fn != nullptr) {
         std::invoke(*item.fn);
         delete item.fn;
       }
+      if (item.quit) {
+        break;
+      }
     }
   }
   vQueueDelete(sWorkQueue);
@@ -68,7 +68,7 @@ auto StartDbTask() -> bool {
     sDbStack = reinterpret_cast<StackType_t*>(
         heap_caps_malloc(kDbStackSize, MALLOC_CAP_SPIRAM));
   }
-  sWorkQueue = xQueueCreate(8, sizeof(std::function<void(void)>*));
+  sWorkQueue = xQueueCreate(8, sizeof(WorkItem));
   xTaskCreateStatic(&DatabaseTaskMain, "DB", kDbStackSize, NULL, 1, sDbStack,
                     &sDbStaticTask);
   return true;
diff --git a/src/database/file_gatherer.cpp b/src/database/file_gatherer.cpp
new file mode 100644
index 00000000..e3b89285
--- /dev/null
+++ b/src/database/file_gatherer.cpp
@@ -0,0 +1,60 @@
+#include "file_gatherer.hpp"
+
+#include <deque>
+#include <functional>
+#include <sstream>
+#include <string>
+
+#include "ff.h"
+
+namespace database {
+
+static_assert(sizeof(TCHAR) == sizeof(char), "TCHAR must be CHAR");
+
+auto FileGathererImpl::FindFiles(const std::string& root,
+                                 std::function<void(const std::string&)> cb)
+    -> void {
+  std::deque<std::string> to_explore;
+  to_explore.push_back(root);
+
+  while (!to_explore.empty()) {
+    std::string next_path_str = to_explore.front();
+    const TCHAR* next_path = static_cast<const TCHAR*>(next_path_str.c_str());
+
+    FF_DIR dir;
+    FRESULT res = f_opendir(&dir, next_path);
+    if (res != FR_OK) {
+      // TODO: log.
+      continue;
+    }
+
+    for (;;) {
+      FILINFO info;
+      res = f_readdir(&dir, &info);
+      if (res != FR_OK || info.fname[0] == 0) {
+        // No more files in the directory.
+        break;
+      } else if (info.fattrib & (AM_HID | AM_SYS) || info.fname[0] == '.') {
+        // System or hidden file. Ignore it and move on.
+        continue;
+      } else {
+        std::stringstream full_path;
+        full_path << next_path_str << "/" << info.fname;
+
+        if (info.fattrib & AM_DIR) {
+          // This is a directory. Add it to the explore queue.
+          to_explore.push_back(full_path.str());
+        } else {
+          // This is a file! Let the callback know about it.
+          // std::invoke(cb, full_path.str(), info);
+          std::invoke(cb, full_path.str());
+        }
+      }
+    }
+
+    f_closedir(&dir);
+    to_explore.pop_front();
+  }
+}
+
+}  // namespace database
diff --git a/src/database/include/database.hpp b/src/database/include/database.hpp
index 6cdaaca6..29872e8d 100644
--- a/src/database/include/database.hpp
+++ b/src/database/include/database.hpp
@@ -9,6 +9,7 @@
 #include <utility>
 #include <vector>
 
+#include "file_gatherer.hpp"
 #include "leveldb/cache.h"
 #include "leveldb/db.h"
 #include "leveldb/iterator.h"
@@ -17,6 +18,7 @@
 #include "records.hpp"
 #include "result.hpp"
 #include "song.hpp"
+#include "tag_parser.hpp"
 
 namespace database {
 
@@ -61,12 +63,15 @@ class Database {
     ALREADY_OPEN,
     FAILED_TO_OPEN,
   };
+  static auto Open(IFileGatherer* file_gatherer, ITagParser* tag_parser)
+      -> cpp::result<Database*, DatabaseError>;
   static auto Open() -> cpp::result<Database*, DatabaseError>;
 
+  static auto Destroy() -> void;
+
   ~Database();
 
   auto Update() -> std::future<void>;
-  auto Destroy() -> std::future<void>;
 
   auto GetSongs(std::size_t page_size) -> std::future<Result<Song>>;
   auto GetMoreSongs(std::size_t page_size, Continuation c)
@@ -80,10 +85,19 @@ class Database {
   Database& operator=(const Database&) = delete;
 
  private:
-  std::unique_ptr<leveldb::DB> db_;
-  std::unique_ptr<leveldb::Cache> cache_;
-
-  Database(leveldb::DB* db, leveldb::Cache* cache);
+  // Owned. Dumb pointers because destruction needs to be done in an explicit
+  // order.
+  leveldb::DB* db_;
+  leveldb::Cache* cache_;
+
+  // Not owned.
+  IFileGatherer* file_gatherer_;
+  ITagParser* tag_parser_;
+
+  Database(leveldb::DB* db,
+           leveldb::Cache* cache,
+           IFileGatherer* file_gatherer,
+           ITagParser* tag_parser);
 
   auto dbMintNewSongId() -> SongId;
   auto dbEntomb(SongId song, uint64_t hash) -> void;
diff --git a/src/database/include/file_gatherer.hpp b/src/database/include/file_gatherer.hpp
index 5df5a61b..bba4d0df 100644
--- a/src/database/include/file_gatherer.hpp
+++ b/src/database/include/file_gatherer.hpp
@@ -9,51 +9,20 @@
 
 namespace database {
 
-static_assert(sizeof(TCHAR) == sizeof(char), "TCHAR must be CHAR");
-
-template <typename Callback>
-auto FindFiles(const std::string& root, Callback cb) -> void {
-  std::deque<std::string> to_explore;
-  to_explore.push_back(root);
-
-  while (!to_explore.empty()) {
-    std::string next_path_str = to_explore.front();
-    const TCHAR* next_path = static_cast<const TCHAR*>(next_path_str.c_str());
-
-    FF_DIR dir;
-    FRESULT res = f_opendir(&dir, next_path);
-    if (res != FR_OK) {
-      // TODO: log.
-      continue;
-    }
-
-    for (;;) {
-      FILINFO info;
-      res = f_readdir(&dir, &info);
-      if (res != FR_OK || info.fname[0] == 0) {
-        // No more files in the directory.
-        break;
-      } else if (info.fattrib & (AM_HID | AM_SYS) || info.fname[0] == '.') {
-        // System or hidden file. Ignore it and move on.
-        continue;
-      } else {
-        std::stringstream full_path;
-        full_path << next_path_str << "/" << info.fname;
-
-        if (info.fattrib & AM_DIR) {
-          // This is a directory. Add it to the explore queue.
-          to_explore.push_back(full_path.str());
-        } else {
-          // This is a file! Let the callback know about it.
-          // std::invoke(cb, full_path.str(), info);
-          std::invoke(cb, full_path.str());
-        }
-      }
-    }
-
-    f_closedir(&dir);
-    to_explore.pop_front();
-  }
-}
+class IFileGatherer {
+ public:
+  virtual ~IFileGatherer(){};
+
+  virtual auto FindFiles(const std::string& root,
+                         std::function<void(const std::string&)> cb)
+      -> void = 0;
+};
+
+class FileGathererImpl : public IFileGatherer {
+ public:
+  virtual auto FindFiles(const std::string& root,
+                         std::function<void(const std::string&)> cb)
+      -> void override;
+};
 
 }  // namespace database
diff --git a/src/database/include/records.hpp b/src/database/include/records.hpp
index 22d2ca5b..371d8d58 100644
--- a/src/database/include/records.hpp
+++ b/src/database/include/records.hpp
@@ -1,14 +1,21 @@
 #pragma once
 
-#include <leveldb/db.h>
 #include <stdint.h>
+
 #include <string>
 
+#include "leveldb/db.h"
 #include "leveldb/slice.h"
+
 #include "song.hpp"
 
 namespace database {
 
+/*
+ * Helper class for creating leveldb Slices bundled with the data they point to.
+ * Slices are otherwise non-owning, which can make handling them post-creation
+ * difficult.
+ */
 class OwningSlice {
  public:
   std::string data;
@@ -17,16 +24,50 @@ class OwningSlice {
   explicit OwningSlice(std::string d);
 };
 
+/*
+ * Returns the prefix added to every SongData key. This can be used to iterate
+ * over every data record in the database.
+ */
 auto CreateDataPrefix() -> OwningSlice;
+
+/* Creates a data key for a song with the specified id. */
 auto CreateDataKey(const SongId& id) -> OwningSlice;
+
+/*
+ * Encodes a SongData instance into bytes, in preparation for storing it within
+ * the database. This encoding is consistent, and will remain stable over time.
+ */
 auto CreateDataValue(const SongData& song) -> OwningSlice;
+
+/*
+ * Parses bytes previously encoded via CreateDataValue back into a SongData. May
+ * return nullopt if parsing fails.
+ */
 auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<SongData>;
 
+/* Creates a hash key for the specified hash. */
 auto CreateHashKey(const uint64_t& hash) -> OwningSlice;
-auto ParseHashValue(const leveldb::Slice&) -> std::optional<SongId>;
+
+/*
+ * Encodes a hash value (at this point just a song id) into bytes, in
+ * preparation for storing within the database. This encoding is consistent, and
+ * will remain stable over time.
+ */
 auto CreateHashValue(SongId id) -> OwningSlice;
 
+/*
+ * Parses bytes previously encoded via CreateHashValue back into a song id. May
+ * return nullopt if parsing fails.
+ */
+auto ParseHashValue(const leveldb::Slice&) -> std::optional<SongId>;
+
+/* Encodes a SongId as bytes. */
 auto SongIdToBytes(SongId id) -> OwningSlice;
-auto BytesToSongId(const std::string& bytes) -> SongId;
+
+/*
+ * Converts a song id encoded via SongIdToBytes back into a SongId. May return
+ * nullopt if parsing fails.
+ */
+auto BytesToSongId(const std::string& bytes) -> std::optional<SongId>;
 
 }  // namespace database
diff --git a/src/database/include/song.hpp b/src/database/include/song.hpp
index 12a7ef0c..e51e5587 100644
--- a/src/database/include/song.hpp
+++ b/src/database/include/song.hpp
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <stdint.h>
-#include <cstdint>
+
 #include <optional>
 #include <string>
 
@@ -10,20 +10,68 @@
 
 namespace database {
 
+/*
+ * Uniquely describes a single song within the database. This value will be
+ * consistent across database updates, and should ideally (but is not guaranteed
+ * to) endure even across a song being removed and re-added.
+ *
+ * Four billion songs should be enough for anybody.
+ */
 typedef uint32_t SongId;
 
-enum Encoding { ENC_UNSUPPORTED, ENC_MP3 };
+/*
+ * Audio file encodings that we are aware of. Used to select an appropriate
+ * decoder at play time.
+ *
+ * Values of this enum are persisted in this database, so it is probably never a
+ * good idea to change the int representation of an existing value.
+ */
+enum class Encoding {
+  kUnsupported = 0,
+  kMp3 = 1,
+};
 
+/*
+ * Owning container for tag-related song metadata that was extracted from a
+ * file.
+ */
 struct SongTags {
   Encoding encoding;
   std::optional<std::string> title;
+
+  // TODO(jacqueline): It would be nice to use shared_ptr's for the artist and
+  // album, since there's likely a fair number of duplicates for each
+  // (especially the former).
+
   std::optional<std::string> artist;
   std::optional<std::string> album;
+
+  /*
+   * Returns a hash of the 'identifying' tags of this song. That is, a hash that
+   * can be used to determine if one song is likely the same as another, across
+   * things like re-encoding, re-mastering, or moving the underlying file.
+   */
   auto Hash() const -> uint64_t;
-};
 
-auto ReadAndParseTags(const std::string& path, SongTags* out) -> bool;
+  bool operator==(const SongTags&) const = default;
+};
 
+/*
+ * Immutable owning container for all of the metadata we store for a particular
+ * song. This includes two main kinds of metadata:
+ *  1. static(ish) attributes, such as the id, path on disk, hash of the tags
+ *  2. dynamic attributes, such as the number of times this song has been
+ *  played.
+ *
+ * Because a SongData is immutable, it is thread safe but will not reflect any
+ * changes to the dynamic attributes that may happen after it was obtained.
+ *
+ * Songs may be 'tombstoned'; this indicates that the song is no longer present
+ * at its previous location on disk, and we do not have any existing files with
+ * a matching tags_hash. When this is the case, we ignore this SongData for most
+ * purposes. We keep the entry in our database so that we can properly restore
+ * dynamic attributes (such as play count) if the song later re-appears on disk.
+ */
 class SongData {
  private:
   const SongId id_;
@@ -33,12 +81,14 @@ class SongData {
   const bool is_tombstoned_;
 
  public:
+  /* Constructor used when adding new songs to the database. */
   SongData(SongId id, const std::string& path, uint64_t hash)
       : id_(id),
         filepath_(path),
         tags_hash_(hash),
         play_count_(0),
         is_tombstoned_(false) {}
+
   SongData(SongId id,
            const std::string& path,
            uint64_t hash,
@@ -57,12 +107,30 @@ class SongData {
   auto is_tombstoned() const -> bool { return is_tombstoned_; }
 
   auto UpdateHash(uint64_t new_hash) const -> SongData;
+
+  /*
+   * Marks this song data as a 'tombstone'. Tombstoned songs are not playable,
+   * and should not generally be shown to users.
+   */
   auto Entomb() const -> SongData;
+
+  /*
+   * Clears the tombstone bit of this song, and updates the path to reflect its
+   * new location.
+   */
   auto Exhume(const std::string& new_path) const -> SongData;
 
   bool operator==(const SongData&) const = default;
 };
 
+/*
+ * Immutable and owning combination of a song's tags and metadata.
+ *
+ * Note that instances of this class may have a fairly large memory impact, due
+ * to the large number of strings they own. Prefer to query the database again
+ * (which has its own caching layer), rather than retaining Song instances for a
+ * long time.
+ */
 class Song {
  public:
   Song(SongData data, SongTags tags) : data_(data), tags_(tags) {}
@@ -70,6 +138,8 @@ class Song {
   auto data() -> const SongData& { return data_; }
   auto tags() -> const SongTags& { return tags_; }
 
+  bool operator==(const Song&) const = default;
+
  private:
   const SongData data_;
   const SongTags tags_;
diff --git a/src/database/include/tag_parser.hpp b/src/database/include/tag_parser.hpp
new file mode 100644
index 00000000..1aee94c7
--- /dev/null
+++ b/src/database/include/tag_parser.hpp
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <string>
+
+#include "song.hpp"
+
+namespace database {
+
+class ITagParser {
+ public:
+  virtual ~ITagParser() {}
+  virtual auto ReadAndParseTags(const std::string& path, SongTags* out)
+      -> bool = 0;
+};
+
+class TagParserImpl : public ITagParser {
+ public:
+  virtual auto ReadAndParseTags(const std::string& path, SongTags* out)
+      -> bool override;
+};
+
+}  // namespace database
diff --git a/src/database/records.cpp b/src/database/records.cpp
index e75e2316..572850d2 100644
--- a/src/database/records.cpp
+++ b/src/database/records.cpp
@@ -1,12 +1,13 @@
 #include "records.hpp"
 
-#include <cbor.h>
-#include <esp_log.h>
 #include <stdint.h>
 
 #include <sstream>
 #include <vector>
 
+#include "cbor.h"
+#include "esp_log.h"
+
 #include "song.hpp"
 
 namespace database {
@@ -17,15 +18,29 @@ static const char kDataPrefix = 'D';
 static const char kHashPrefix = 'H';
 static const char kFieldSeparator = '\0';
 
+/*
+ * Helper function for allocating an appropriately-sized byte buffer, then
+ * encoding data into it.
+ *
+ * 'T' should be a callable that takes a CborEncoder* as
+ * an argument, and stores values within that encoder. 'T' will be called
+ * exactly twice: first to detemine the buffer size, and then second to do the
+ * encoding.
+ *
+ * 'out_buf' will be set to the location of newly allocated memory; it is up to
+ * the caller to free it. Returns the size of 'out_buf'.
+ */
 template <typename T>
 auto cbor_encode(uint8_t** out_buf, T fn) -> std::size_t {
+  // First pass: work out how many bytes we will encode into.
   CborEncoder size_encoder;
   cbor_encoder_init(&size_encoder, NULL, 0, 0);
   std::invoke(fn, &size_encoder);
   std::size_t buf_size = cbor_encoder_get_extra_bytes_needed(&size_encoder);
-  *out_buf = new uint8_t[buf_size];
 
+  // Second pass: do the encoding.
   CborEncoder encoder;
+  *out_buf = new uint8_t[buf_size];
   cbor_encoder_init(&encoder, *out_buf, buf_size, 0);
   std::invoke(fn, &encoder);
 
@@ -99,13 +114,13 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<SongData> {
   CborError err;
   err = cbor_parser_init(reinterpret_cast<const uint8_t*>(slice.data()),
                          slice.size(), 0, &parser, &container);
-  if (err != CborNoError) {
+  if (err != CborNoError || !cbor_value_is_container(&container)) {
     return {};
   }
 
   CborValue val;
   err = cbor_value_enter_container(&container, &val);
-  if (err != CborNoError) {
+  if (err != CborNoError || !cbor_value_is_unsigned_integer(&val)) {
     return {};
   }
 
@@ -116,14 +131,14 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<SongData> {
   }
   SongId id = raw_int;
   err = cbor_value_advance(&val);
-  if (err != CborNoError) {
+  if (err != CborNoError || !cbor_value_is_text_string(&val)) {
     return {};
   }
 
   char* raw_path;
   std::size_t len;
   err = cbor_value_dup_text_string(&val, &raw_path, &len, &val);
-  if (err != CborNoError) {
+  if (err != CborNoError || !cbor_value_is_unsigned_integer(&val)) {
     return {};
   }
   std::string path(raw_path, len);
@@ -135,7 +150,7 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<SongData> {
   }
   uint64_t hash = raw_int;
   err = cbor_value_advance(&val);
-  if (err != CborNoError) {
+  if (err != CborNoError || !cbor_value_is_unsigned_integer(&val)) {
     return {};
   }
 
@@ -145,7 +160,7 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<SongData> {
   }
   uint32_t play_count = raw_int;
   err = cbor_value_advance(&val);
-  if (err != CborNoError) {
+  if (err != CborNoError || !cbor_value_is_boolean(&val)) {
     return {};
   }
 
@@ -190,11 +205,14 @@ auto SongIdToBytes(SongId id) -> OwningSlice {
   return OwningSlice(as_str);
 }
 
-auto BytesToSongId(const std::string& bytes) -> SongId {
+auto BytesToSongId(const std::string& bytes) -> std::optional<SongId> {
   CborParser parser;
   CborValue val;
   cbor_parser_init(reinterpret_cast<const uint8_t*>(bytes.data()), bytes.size(),
                    0, &parser, &val);
+  if (!cbor_value_is_unsigned_integer(&val)) {
+    return {};
+  }
   uint64_t raw_id;
   cbor_value_get_uint64(&val, &raw_id);
   return raw_id;
diff --git a/src/database/song.cpp b/src/database/song.cpp
index 32507fa2..a0e6ce4b 100644
--- a/src/database/song.cpp
+++ b/src/database/song.cpp
@@ -1,113 +1,21 @@
 #include "song.hpp"
 
-#include <esp_log.h>
-#include <ff.h>
 #include <komihash.h>
-#include <tags.h>
 
 namespace database {
 
-namespace libtags {
-
-struct Aux {
-  FIL file;
-  FILINFO info;
-  SongTags* tags;
-};
-
-static int read(Tagctx* ctx, void* buf, int cnt) {
-  Aux* aux = reinterpret_cast<Aux*>(ctx->aux);
-  UINT bytes_read;
-  if (f_read(&aux->file, buf, cnt, &bytes_read) != FR_OK) {
-    return -1;
-  }
-  return bytes_read;
-}
-
-static int seek(Tagctx* ctx, int offset, int whence) {
-  Aux* aux = reinterpret_cast<Aux*>(ctx->aux);
-  FRESULT res;
-  if (whence == 0) {
-    // Seek from the start of the file. This is f_lseek's behaviour.
-    res = f_lseek(&aux->file, offset);
-  } else if (whence == 1) {
-    // Seek from current offset.
-    res = f_lseek(&aux->file, aux->file.fptr + offset);
-  } else if (whence == 2) {
-    // Seek from the end of the file
-    res = f_lseek(&aux->file, aux->info.fsize + offset);
-  } else {
-    return -1;
-  }
-  return res;
-}
-
-static void tag(Tagctx* ctx,
-                int t,
-                const char* k,
-                const char* v,
-                int offset,
-                int size,
-                Tagread f) {
-  Aux* aux = reinterpret_cast<Aux*>(ctx->aux);
-  if (t == Ttitle) {
-    aux->tags->title = v;
-  } else if (t == Tartist) {
-    aux->tags->artist = v;
-  } else if (t == Talbum) {
-    aux->tags->album = v;
-  }
-}
-
-static void toc(Tagctx* ctx, int ms, int offset) {}
-
-}  // namespace libtags
-
-static const std::size_t kBufSize = 1024;
-static const char* kTag = "TAGS";
-
-auto ReadAndParseTags(const std::string& path, SongTags* out) -> bool {
-  libtags::Aux aux;
-  aux.tags = out;
-  if (f_stat(path.c_str(), &aux.info) != FR_OK ||
-      f_open(&aux.file, path.c_str(), FA_READ) != FR_OK) {
-    ESP_LOGW(kTag, "failed to open file %s", path.c_str());
-    return false;
-  }
-  // Fine to have this on the stack; this is only called on the leveldb task.
-  char buf[kBufSize];
-  Tagctx ctx;
-  ctx.read = libtags::read;
-  ctx.seek = libtags::seek;
-  ctx.tag = libtags::tag;
-  ctx.toc = libtags::toc;
-  ctx.aux = &aux;
-  ctx.buf = buf;
-  ctx.bufsz = kBufSize;
-  int res = tagsget(&ctx);
-  f_close(&aux.file);
-
-  if (res != 0) {
-    // Parsing failed.
-    return false;
-  }
-
-  switch (ctx.format) {
-    case Fmp3:
-      out->encoding = ENC_MP3;
-      break;
-    default:
-      out->encoding = ENC_UNSUPPORTED;
-  }
-
-  return true;
-}
-
+/* Helper function to update a komihash stream with a std::string. */
 auto HashString(komihash_stream_t* stream, std::string str) -> void {
   komihash_stream_update(stream, str.c_str(), str.length());
 }
 
+/*
+ * Uses a komihash stream to incrementally hash tags. This lowers the function's
+ * memory footprint a little so that it's safe to call from any stack.
+ */
 auto SongTags::Hash() const -> uint64_t {
+  // TODO(jacqueline): this function doesn't work very well for songs with no
+  // tags at all.
   komihash_stream_t stream;
   komihash_stream_init(&stream, 0);
   HashString(&stream, title.value_or(""));
diff --git a/src/database/tag_parser.cpp b/src/database/tag_parser.cpp
new file mode 100644
index 00000000..90866ff3
--- /dev/null
+++ b/src/database/tag_parser.cpp
@@ -0,0 +1,107 @@
+#include "tag_parser.hpp"
+
+#include <esp_log.h>
+#include <ff.h>
+#include <tags.h>
+
+namespace database {
+
+namespace libtags {
+
+struct Aux {
+  FIL file;
+  FILINFO info;
+  SongTags* tags;
+};
+
+static int read(Tagctx* ctx, void* buf, int cnt) {
+  Aux* aux = reinterpret_cast<Aux*>(ctx->aux);
+  UINT bytes_read;
+  if (f_read(&aux->file, buf, cnt, &bytes_read) != FR_OK) {
+    return -1;
+  }
+  return bytes_read;
+}
+
+static int seek(Tagctx* ctx, int offset, int whence) {
+  Aux* aux = reinterpret_cast<Aux*>(ctx->aux);
+  FRESULT res;
+  if (whence == 0) {
+    // Seek from the start of the file. This is f_lseek's behaviour.
+    res = f_lseek(&aux->file, offset);
+  } else if (whence == 1) {
+    // Seek from current offset.
+    res = f_lseek(&aux->file, aux->file.fptr + offset);
+  } else if (whence == 2) {
+    // Seek from the end of the file
+    res = f_lseek(&aux->file, aux->info.fsize + offset);
+  } else {
+    return -1;
+  }
+  return res;
+}
+
+static void tag(Tagctx* ctx,
+                int t,
+                const char* k,
+                const char* v,
+                int offset,
+                int size,
+                Tagread f) {
+  Aux* aux = reinterpret_cast<Aux*>(ctx->aux);
+  if (t == Ttitle) {
+    aux->tags->title = v;
+  } else if (t == Tartist) {
+    aux->tags->artist = v;
+  } else if (t == Talbum) {
+    aux->tags->album = v;
+  }
+}
+
+static void toc(Tagctx* ctx, int ms, int offset) {}
+
+}  // namespace libtags
+
+static const std::size_t kBufSize = 1024;
+static const char* kTag = "TAGS";
+
+auto TagParserImpl::ReadAndParseTags(const std::string& path, SongTags* out)
+    -> bool {
+  libtags::Aux aux;
+  aux.tags = out;
+  if (f_stat(path.c_str(), &aux.info) != FR_OK ||
+      f_open(&aux.file, path.c_str(), FA_READ) != FR_OK) {
+    ESP_LOGW(kTag, "failed to open file %s", path.c_str());
+    return false;
+  }
+  // Fine to have this on the stack; this is only called on tasks with large
+  // stacks anyway, due to all the string handling.
+  char buf[kBufSize];
+  Tagctx ctx;
+  ctx.read = libtags::read;
+  ctx.seek = libtags::seek;
+  ctx.tag = libtags::tag;
+  ctx.toc = libtags::toc;
+  ctx.aux = &aux;
+  ctx.buf = buf;
+  ctx.bufsz = kBufSize;
+  int res = tagsget(&ctx);
+  f_close(&aux.file);
+
+  if (res != 0) {
+    // Parsing failed.
+    return false;
+  }
+
+  switch (ctx.format) {
+    case Fmp3:
+      out->encoding = Encoding::kMp3;
+      break;
+    default:
+      out->encoding = Encoding::kUnsupported;
+  }
+
+  return true;
+}
+
+}  // namespace database
diff --git a/src/database/test/CMakeLists.txt b/src/database/test/CMakeLists.txt
index af42a78a..b5532da1 100644
--- a/src/database/test/CMakeLists.txt
+++ b/src/database/test/CMakeLists.txt
@@ -1,4 +1,4 @@
 idf_component_register(
-  SRCS "test_records.cpp"
+  SRCS "test_records.cpp" "test_database.cpp"
   INCLUDE_DIRS "."
-  REQUIRES catch2 cmock database)
+  REQUIRES catch2 cmock database drivers fixtures)
diff --git a/src/database/test/test_database.cpp b/src/database/test/test_database.cpp
new file mode 100644
index 00000000..d61421ee
--- /dev/null
+++ b/src/database/test/test_database.cpp
@@ -0,0 +1,159 @@
+#include "database.hpp"
+
+#include <stdint.h>
+#include <iomanip>
+#include <map>
+#include <memory>
+#include <string>
+
+#include "catch2/catch.hpp"
+#include "driver_cache.hpp"
+#include "esp_log.h"
+#include "file_gatherer.hpp"
+#include "i2c_fixture.hpp"
+#include "leveldb/db.h"
+#include "song.hpp"
+#include "spi_fixture.hpp"
+#include "tag_parser.hpp"
+
+namespace database {
+
+class TestBackends : public IFileGatherer, public ITagParser {
+ public:
+  std::map<std::string, SongTags> songs;
+
+  auto MakeSong(const std::string& path, const std::string& title) -> void {
+    SongTags tags;
+    tags.encoding = Encoding::kMp3;
+    tags.title = title;
+    songs[path] = tags;
+  }
+
+  auto FindFiles(const std::string& root,
+                 std::function<void(const std::string&)> cb) -> void override {
+    for (auto keyval : songs) {
+      std::invoke(cb, keyval.first);
+    }
+  }
+
+  auto ReadAndParseTags(const std::string& path, SongTags* out)
+      -> bool override {
+    if (songs.contains(path)) {
+      *out = songs.at(path);
+      return true;
+    }
+    return false;
+  }
+};
+
+TEST_CASE("song database", "[integration]") {
+  I2CFixture i2c;
+  SpiFixture spi;
+  drivers::DriverCache drivers;
+  auto storage = drivers.AcquireStorage();
+
+  Database::Destroy();
+
+  TestBackends songs;
+  auto open_res = Database::Open(&songs, &songs);
+  REQUIRE(open_res.has_value());
+  std::unique_ptr<Database> db(open_res.value());
+
+  SECTION("empty database") {
+    std::unique_ptr<std::vector<Song>> res(db->GetSongs(10).get().values());
+    REQUIRE(res->size() == 0);
+  }
+
+  SECTION("add new songs") {
+    songs.MakeSong("song1.mp3", "Song 1");
+    songs.MakeSong("song2.wav", "Song 2");
+    songs.MakeSong("song3.exe", "Song 3");
+
+    db->Update();
+
+    std::unique_ptr<std::vector<Song>> res(db->GetSongs(10).get().values());
+    REQUIRE(res->size() == 3);
+    CHECK(*res->at(0).tags().title == "Song 1");
+    CHECK(res->at(0).data().id() == 1);
+    CHECK(*res->at(1).tags().title == "Song 2");
+    CHECK(res->at(1).data().id() == 2);
+    CHECK(*res->at(2).tags().title == "Song 3");
+    CHECK(res->at(2).data().id() == 3);
+
+    SECTION("update with no filesystem changes") {
+      db->Update();
+
+      std::unique_ptr<std::vector<Song>> new_res(
+          db->GetSongs(10).get().values());
+      REQUIRE(new_res->size() == 3);
+      CHECK(res->at(0) == new_res->at(0));
+      CHECK(res->at(1) == new_res->at(1));
+      CHECK(res->at(2) == new_res->at(2));
+    }
+
+    SECTION("update with all songs gone") {
+      songs.songs.clear();
+
+      db->Update();
+
+      std::unique_ptr<std::vector<Song>> new_res(
+          db->GetSongs(10).get().values());
+      CHECK(new_res->size() == 0);
+
+      SECTION("update with one song returned") {
+        songs.MakeSong("song2.wav", "Song 2");
+
+        db->Update();
+
+        std::unique_ptr<std::vector<Song>> new_res(
+            db->GetSongs(10).get().values());
+        REQUIRE(new_res->size() == 1);
+        CHECK(res->at(1) == new_res->at(0));
+      }
+    }
+
+    SECTION("update with one song gone") {
+      songs.songs.erase("song2.wav");
+
+      db->Update();
+
+      std::unique_ptr<std::vector<Song>> new_res(
+          db->GetSongs(10).get().values());
+      REQUIRE(new_res->size() == 2);
+      CHECK(res->at(0) == new_res->at(0));
+      CHECK(res->at(2) == new_res->at(1));
+    }
+
+    SECTION("update with tags changed") {
+      songs.MakeSong("song3.exe", "The Song 3");
+
+      db->Update();
+
+      std::unique_ptr<std::vector<Song>> new_res(
+          db->GetSongs(10).get().values());
+      REQUIRE(new_res->size() == 3);
+      CHECK(res->at(0) == new_res->at(0));
+      CHECK(res->at(1) == new_res->at(1));
+      CHECK(*new_res->at(2).tags().title == "The Song 3");
+      // The id should not have changed, since this was just a tag update.
+      CHECK(res->at(2).data().id() == new_res->at(2).data().id());
+    }
+
+    SECTION("update with one new song") {
+      songs.MakeSong("my song.midi", "Song 1 (nightcore remix)");
+
+      db->Update();
+
+      std::unique_ptr<std::vector<Song>> new_res(
+          db->GetSongs(10).get().values());
+      REQUIRE(new_res->size() == 4);
+      CHECK(res->at(0) == new_res->at(0));
+      CHECK(res->at(1) == new_res->at(1));
+      CHECK(res->at(2) == new_res->at(2));
+      CHECK(*new_res->at(3).tags().title == "Song 1 (nightcore remix)");
+      CHECK(new_res->at(3).data().id() == 4);
+    }
+  }
+}
+
+}  // namespace database
diff --git a/src/database/test/test_records.cpp b/src/database/test/test_records.cpp
index 7c08e335..d84d2b6a 100644
--- a/src/database/test/test_records.cpp
+++ b/src/database/test/test_records.cpp
@@ -38,7 +38,7 @@ TEST_CASE("database record encoding", "[unit]") {
     }
 
     SECTION("round-trips") {
-      CHECK(BytesToSongId(as_bytes.data) == id);
+      CHECK(*BytesToSongId(as_bytes.data) == id);
     }
 
     SECTION("encodes compactly") {
@@ -47,6 +47,12 @@ TEST_CASE("database record encoding", "[unit]") {
 
       CHECK(small_id.data.size() < large_id.data.size());
     }
+
+    SECTION("decoding rejects garbage") {
+      std::optional<SongId> res = BytesToSongId("i'm gay");
+
+      CHECK(res.has_value() == false);
+    }
   }
 
   SECTION("data keys") {
@@ -95,6 +101,12 @@ TEST_CASE("database record encoding", "[unit]") {
     SECTION("round-trips") {
       CHECK(ParseDataValue(enc.slice) == data);
     }
+
+    SECTION("decoding rejects garbage") {
+      std::optional<SongData> res = ParseDataValue("hi!");
+
+      CHECK(res.has_value() == false);
+    }
   }
 
   SECTION("hash keys") {
@@ -116,6 +128,12 @@ TEST_CASE("database record encoding", "[unit]") {
     SECTION("round-trips") {
       CHECK(ParseHashValue(val.slice) == 123456);
     }
+
+    SECTION("decoding rejects garbage") {
+      std::optional<SongId> res = ParseHashValue("the first song :)");
+
+      CHECK(res.has_value() == false);
+    }
   }
 }
author	jacqueline <me@jacqueline.id.au>	2023-05-12 10:30:07 +1000
committer	jacqueline <me@jacqueline.id.au>	2023-05-12 10:30:07 +1000
commit	961c8014ada037712e8c72f23430362e9f14c1ec (patch)
tree	ce13e0a00fc0d0318d46e6dfbecf2360b4cc5e14 /src/database
parent	10eb120878e01579bff2fdfab7bef59639b21155 (diff)
download	tangara-fw-961c8014ada037712e8c72f23430362e9f14c1ec.tar.gz