summaryrefslogtreecommitdiff
path: root/src/database
diff options
context:
space:
mode:
Diffstat (limited to 'src/database')
-rw-r--r--src/database/CMakeLists.txt4
-rw-r--r--src/database/database.cpp249
-rw-r--r--src/database/include/database.hpp26
-rw-r--r--src/database/include/index.hpp72
-rw-r--r--src/database/include/records.hpp32
-rw-r--r--src/database/include/track.hpp37
-rw-r--r--src/database/index.cpp88
-rw-r--r--src/database/records.cpp211
-rw-r--r--src/database/tag_parser.cpp36
-rw-r--r--src/database/track.cpp39
10 files changed, 687 insertions, 107 deletions
diff --git a/src/database/CMakeLists.txt b/src/database/CMakeLists.txt
index e7b1f62c..04e1d5d8 100644
--- a/src/database/CMakeLists.txt
+++ b/src/database/CMakeLists.txt
@@ -3,9 +3,9 @@
# SPDX-License-Identifier: GPL-3.0-only
idf_component_register(
- SRCS "env_esp.cpp" "database.cpp" "track.cpp" "records.cpp" "file_gatherer.cpp" "tag_parser.cpp"
+ SRCS "env_esp.cpp" "database.cpp" "track.cpp" "records.cpp" "file_gatherer.cpp" "tag_parser.cpp" "index.cpp"
INCLUDE_DIRS "include"
- REQUIRES "result" "span" "esp_psram" "fatfs" "libtags" "komihash" "cbor" "tasks")
+ REQUIRES "result" "span" "esp_psram" "fatfs" "libtags" "komihash" "cbor" "tasks" "shared_string")
target_compile_options(${COMPONENT_LIB} PRIVATE ${EXTRA_WARNINGS})
diff --git a/src/database/database.cpp b/src/database/database.cpp
index ac5e4873..1ac5d729 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -13,11 +13,13 @@
#include <functional>
#include <iomanip>
#include <memory>
+#include <optional>
#include <sstream>
#include "esp_log.h"
#include "ff.h"
#include "freertos/projdefs.h"
+#include "index.hpp"
#include "leveldb/cache.h"
#include "leveldb/db.h"
#include "leveldb/iterator.h"
@@ -130,72 +132,91 @@ Database::~Database() {
auto Database::Update() -> std::future<void> {
return worker_task_->Dispatch<void>([&]() -> void {
- // Stage 1: verify all existing tracks are still valid.
- ESP_LOGI(kTag, "verifying existing tracks");
- const leveldb::Snapshot* snapshot = db_->GetSnapshot();
leveldb::ReadOptions read_options;
read_options.fill_cache = false;
- read_options.snapshot = snapshot;
- leveldb::Iterator* it = db_->NewIterator(read_options);
- OwningSlice prefix = CreateDataPrefix();
- it->Seek(prefix.slice);
- while (it->Valid() && it->key().starts_with(prefix.slice)) {
- std::optional<TrackData> track = ParseDataValue(it->value());
- if (!track) {
- // The value was malformed. Drop this record.
- ESP_LOGW(kTag, "dropping malformed metadata");
+
+ // Stage 0: discard indexes
+ // TODO(jacqueline): I think it should be possible to incrementally update
+ // indexes, but my brain hurts.
+ ESP_LOGI(kTag, "dropping stale indexes");
+ {
+ leveldb::Iterator* it = db_->NewIterator(read_options);
+ OwningSlice prefix = EncodeAllIndexesPrefix();
+ it->Seek(prefix.slice);
+ while (it->Valid() && it->key().starts_with(prefix.slice)) {
db_->Delete(leveldb::WriteOptions(), it->key());
it->Next();
- continue;
}
+ }
- if (track->is_tombstoned()) {
- ESP_LOGW(kTag, "skipping tombstoned %lx", track->id());
- it->Next();
- continue;
- }
+ // Stage 1: verify all existing tracks are still valid.
+ ESP_LOGI(kTag, "verifying existing tracks");
+ {
+ leveldb::Iterator* it = db_->NewIterator(read_options);
+ OwningSlice prefix = EncodeDataPrefix();
+ it->Seek(prefix.slice);
+ while (it->Valid() && it->key().starts_with(prefix.slice)) {
+ std::optional<TrackData> track = ParseDataValue(it->value());
+ if (!track) {
+ // The value was malformed. Drop this record.
+ ESP_LOGW(kTag, "dropping malformed metadata");
+ db_->Delete(leveldb::WriteOptions(), it->key());
+ it->Next();
+ continue;
+ }
- TrackTags tags;
- if (!tag_parser_->ReadAndParseTags(track->filepath(), &tags) ||
- tags.encoding == Encoding::kUnsupported) {
- // We couldn't read the tags for this track. Either they were
- // malformed, or perhaps the file is missing. Either way, tombstone
- // this record.
- ESP_LOGW(kTag, "entombing missing #%lx", track->id());
- dbPutTrackData(track->Entomb());
- it->Next();
- continue;
- }
+ if (track->is_tombstoned()) {
+ ESP_LOGW(kTag, "skipping tombstoned %lx", track->id());
+ it->Next();
+ continue;
+ }
- uint64_t new_hash = tags.Hash();
- if (new_hash != track->tags_hash()) {
- // This track's tags have changed. Since the filepath is exactly the
- // same, we assume this is a legitimate correction. Update the
- // database.
- ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash(),
- new_hash);
- dbPutTrackData(track->UpdateHash(new_hash));
- dbPutHash(new_hash, track->id());
- }
+ TrackTags tags{};
+ if (!tag_parser_->ReadAndParseTags(track->filepath(), &tags) ||
+ tags.encoding() == Encoding::kUnsupported) {
+ // We couldn't read the tags for this track. Either they were
+ // malformed, or perhaps the file is missing. Either way, tombstone
+ // this record.
+ ESP_LOGW(kTag, "entombing missing #%lx", track->id());
+ dbPutTrackData(track->Entomb());
+ it->Next();
+ continue;
+ }
- it->Next();
+ // At this point, we know that the track still exists in its original
+ // location. All that's left to do is update any metadata about it.
+
+ uint64_t new_hash = tags.Hash();
+ if (new_hash != track->tags_hash()) {
+ // This track's tags have changed. Since the filepath is exactly the
+ // same, we assume this is a legitimate correction. Update the
+ // database.
+ ESP_LOGI(kTag, "updating hash (%llx -> %llx)", track->tags_hash(),
+ new_hash);
+ dbPutTrackData(track->UpdateHash(new_hash));
+ dbPutHash(new_hash, track->id());
+ }
+
+ dbCreateIndexesForTrack({*track, tags});
+
+ it->Next();
+ }
+ delete it;
}
- delete it;
- db_->ReleaseSnapshot(snapshot);
// Stage 2: search for newly added files.
ESP_LOGI(kTag, "scanning for new tracks");
file_gatherer_->FindFiles("", [&](const std::string& path) {
TrackTags tags;
if (!tag_parser_->ReadAndParseTags(path, &tags) ||
- tags.encoding == Encoding::kUnsupported) {
+ tags.encoding() == Encoding::kUnsupported) {
// No parseable tags; skip this fiile.
return;
}
// Check for any existing record with the same hash.
uint64_t hash = tags.Hash();
- OwningSlice key = CreateHashKey(hash);
+ OwningSlice key = EncodeHashKey(hash);
std::optional<TrackId> existing_hash;
std::string raw_entry;
if (db_->Get(leveldb::ReadOptions(), key.slice, &raw_entry).ok()) {
@@ -207,7 +228,11 @@ auto Database::Update() -> std::future<void> {
// malformed. Either way, record this as a new track.
TrackId id = dbMintNewTrackId();
ESP_LOGI(kTag, "recording new 0x%lx", id);
- dbPutTrack(id, path, hash);
+
+ TrackData data(id, path, hash);
+ dbPutTrackData(data);
+ dbPutHash(hash, id);
+ dbCreateIndexesForTrack({data, tags});
return;
}
@@ -216,12 +241,14 @@ auto Database::Update() -> std::future<void> {
// We found a hash that matches, but there's no data record? Weird.
TrackData new_data(*existing_hash, path, hash);
dbPutTrackData(new_data);
+ dbCreateIndexesForTrack({*existing_data, tags});
return;
}
if (existing_data->is_tombstoned()) {
ESP_LOGI(kTag, "exhuming track %lu", existing_data->id());
dbPutTrackData(existing_data->Exhume(path));
+ dbCreateIndexesForTrack({*existing_data, tags});
} else if (existing_data->filepath() != path) {
ESP_LOGW(kTag, "tag hash collision");
}
@@ -241,11 +268,41 @@ auto Database::GetTrackPath(TrackId id)
});
}
+auto Database::GetIndexes() -> std::vector<IndexInfo> {
+ // TODO(jacqueline): This probably needs to be async? When we have runtime
+ // configurable indexes, they will need to come from somewhere.
+ return {
+ kAllTracks,
+ kAlbumsByArtist,
+ kTracksByGenre,
+ };
+}
+
+auto Database::GetTracksByIndex(const IndexInfo& index, std::size_t page_size)
+ -> std::future<Result<IndexRecord>*> {
+ return worker_task_->Dispatch<Result<IndexRecord>*>(
+ [=, this]() -> Result<IndexRecord>* {
+ IndexKey::Header header{
+ .id = index.id,
+ .depth = 0,
+ .components_hash = 0,
+ };
+ OwningSlice prefix = EncodeIndexPrefix(header);
+ Continuation<IndexRecord> c{.iterator = nullptr,
+ .prefix = prefix.data,
+ .start_key = prefix.data,
+ .forward = true,
+ .was_prev_forward = true,
+ .page_size = page_size};
+ return dbGetPage(c);
+ });
+}
+
auto Database::GetTracks(std::size_t page_size) -> std::future<Result<Track>*> {
return worker_task_->Dispatch<Result<Track>*>([=, this]() -> Result<Track>* {
Continuation<Track> c{.iterator = nullptr,
- .prefix = CreateDataPrefix().data,
- .start_key = CreateDataPrefix().data,
+ .prefix = EncodeDataPrefix().data,
+ .start_key = EncodeDataPrefix().data,
.forward = true,
.was_prev_forward = true,
.page_size = page_size};
@@ -276,6 +333,8 @@ auto Database::GetPage(Continuation<T>* c) -> std::future<Result<T>*> {
template auto Database::GetPage<Track>(Continuation<Track>* c)
-> std::future<Result<Track>*>;
+template auto Database::GetPage<IndexRecord>(Continuation<IndexRecord>* c)
+ -> std::future<Result<IndexRecord>*>;
template auto Database::GetPage<std::string>(Continuation<std::string>* c)
-> std::future<Result<std::string>*>;
@@ -300,23 +359,23 @@ auto Database::dbMintNewTrackId() -> TrackId {
}
auto Database::dbEntomb(TrackId id, uint64_t hash) -> void {
- OwningSlice key = CreateHashKey(hash);
- OwningSlice val = CreateHashValue(id);
+ OwningSlice key = EncodeHashKey(hash);
+ OwningSlice val = EncodeHashValue(id);
if (!db_->Put(leveldb::WriteOptions(), key.slice, val.slice).ok()) {
ESP_LOGE(kTag, "failed to entomb #%llx (id #%lx)", hash, id);
}
}
auto Database::dbPutTrackData(const TrackData& s) -> void {
- OwningSlice key = CreateDataKey(s.id());
- OwningSlice val = CreateDataValue(s);
+ OwningSlice key = EncodeDataKey(s.id());
+ OwningSlice val = EncodeDataValue(s);
if (!db_->Put(leveldb::WriteOptions(), key.slice, val.slice).ok()) {
ESP_LOGE(kTag, "failed to write data for #%lx", s.id());
}
}
auto Database::dbGetTrackData(TrackId id) -> std::optional<TrackData> {
- OwningSlice key = CreateDataKey(id);
+ OwningSlice key = EncodeDataKey(id);
std::string raw_val;
if (!db_->Get(leveldb::ReadOptions(), key.slice, &raw_val).ok()) {
ESP_LOGW(kTag, "no key found for #%lx", id);
@@ -326,15 +385,15 @@ auto Database::dbGetTrackData(TrackId id) -> std::optional<TrackData> {
}
auto Database::dbPutHash(const uint64_t& hash, TrackId i) -> void {
- OwningSlice key = CreateHashKey(hash);
- OwningSlice val = CreateHashValue(i);
+ OwningSlice key = EncodeHashKey(hash);
+ OwningSlice val = EncodeHashValue(i);
if (!db_->Put(leveldb::WriteOptions(), key.slice, val.slice).ok()) {
ESP_LOGE(kTag, "failed to write hash for #%lx", i);
}
}
auto Database::dbGetHash(const uint64_t& hash) -> std::optional<TrackId> {
- OwningSlice key = CreateHashKey(hash);
+ OwningSlice key = EncodeHashKey(hash);
std::string raw_val;
if (!db_->Get(leveldb::ReadOptions(), key.slice, &raw_val).ok()) {
ESP_LOGW(kTag, "no key found for hash #%llx", hash);
@@ -343,11 +402,13 @@ auto Database::dbGetHash(const uint64_t& hash) -> std::optional<TrackId> {
return ParseHashValue(raw_val);
}
-auto Database::dbPutTrack(TrackId id,
- const std::string& path,
- const uint64_t& hash) -> void {
- dbPutTrackData(TrackData(id, path, hash));
- dbPutHash(hash, id);
+auto Database::dbCreateIndexesForTrack(Track track) -> void {
+ for (const IndexInfo& index : GetIndexes()) {
+ leveldb::WriteBatch writes;
+ if (Index(index, track, &writes)) {
+ db_->Write(leveldb::WriteOptions(), &writes);
+ }
+ }
}
template <typename T>
@@ -475,6 +536,31 @@ template auto Database::dbGetPage<std::string>(
const Continuation<std::string>& c) -> Result<std::string>*;
template <>
+auto Database::ParseRecord<IndexRecord>(const leveldb::Slice& key,
+ const leveldb::Slice& val)
+ -> std::optional<IndexRecord> {
+ std::optional<IndexKey> data = ParseIndexKey(key);
+ if (!data) {
+ return {};
+ }
+
+ // If there was a track id included for this key, then this is a leaf record.
+ // Fetch the actual track data instead of relying on the information in the
+ // key.
+ std::optional<Track> track;
+ if (data->track) {
+ std::optional<TrackData> track_data = dbGetTrackData(*data->track);
+ TrackTags track_tags;
+ if (track_data &&
+ tag_parser_->ReadAndParseTags(track_data->filepath(), &track_tags)) {
+ track.emplace(*track_data, track_tags);
+ }
+ }
+
+ return IndexRecord(*data, track);
+}
+
+template <>
auto Database::ParseRecord<Track>(const leveldb::Slice& key,
const leveldb::Slice& val)
-> std::optional<Track> {
@@ -510,13 +596,46 @@ auto Database::ParseRecord<std::string>(const leveldb::Slice& key,
}
}
}
- stream << "\tval: 0x";
- std::string str = val.ToString();
- for (int i = 0; i < val.size(); i++) {
- stream << std::hex << std::setfill('0') << std::setw(2)
- << static_cast<int>(str[i]);
+ if (!val.empty()) {
+ stream << "\tval: 0x";
+ std::string str = val.ToString();
+ for (int i = 0; i < val.size(); i++) {
+ stream << std::hex << std::setfill('0') << std::setw(2)
+ << static_cast<int>(str[i]);
+ }
}
return stream.str();
}
+IndexRecord::IndexRecord(const IndexKey& key, std::optional<Track> track)
+ : key_(key), track_(track) {}
+
+auto IndexRecord::text() const -> std::optional<shared_string> {
+ if (track_) {
+ return track_->TitleOrFilename();
+ }
+ return key_.item;
+}
+
+auto IndexRecord::track() const -> std::optional<Track> {
+ return track_;
+}
+
+auto IndexRecord::Expand(std::size_t page_size) const
+ -> std::optional<Continuation<IndexRecord>> {
+ if (track_) {
+ return {};
+ }
+ IndexKey::Header new_header = ExpandHeader(key_.header, key_.item);
+ OwningSlice new_prefix = EncodeIndexPrefix(new_header);
+ return Continuation<IndexRecord>{
+ .iterator = nullptr,
+ .prefix = new_prefix.data,
+ .start_key = new_prefix.data,
+ .forward = true,
+ .was_prev_forward = true,
+ .page_size = page_size,
+ };
+}
+
} // namespace database
diff --git a/src/database/include/database.hpp b/src/database/include/database.hpp
index 8fecc5f6..77a17b75 100644
--- a/src/database/include/database.hpp
+++ b/src/database/include/database.hpp
@@ -16,6 +16,7 @@
#include <vector>
#include "file_gatherer.hpp"
+#include "index.hpp"
#include "leveldb/cache.h"
#include "leveldb/db.h"
#include "leveldb/iterator.h"
@@ -23,6 +24,7 @@
#include "leveldb/slice.h"
#include "records.hpp"
#include "result.hpp"
+#include "shared_string.h"
#include "tag_parser.hpp"
#include "tasks.hpp"
#include "track.hpp"
@@ -66,6 +68,20 @@ class Result {
std::optional<Continuation<T>> prev_page_;
};
+class IndexRecord {
+ public:
+ explicit IndexRecord(const IndexKey&, std::optional<Track>);
+
+ auto text() const -> std::optional<shared_string>;
+ auto track() const -> std::optional<Track>;
+
+ auto Expand(std::size_t) const -> std::optional<Continuation<IndexRecord>>;
+
+ private:
+ IndexKey key_;
+ std::optional<Track> track_;
+};
+
class Database {
public:
enum DatabaseError {
@@ -84,6 +100,9 @@ class Database {
auto GetTrackPath(TrackId id) -> std::future<std::optional<std::string>>;
+ auto GetIndexes() -> std::vector<IndexInfo>;
+ auto GetTracksByIndex(const IndexInfo& index, std::size_t page_size)
+ -> std::future<Result<IndexRecord>*>;
auto GetTracks(std::size_t page_size) -> std::future<Result<Track>*>;
auto GetDump(std::size_t page_size) -> std::future<Result<std::string>*>;
@@ -118,8 +137,7 @@ class Database {
auto dbGetTrackData(TrackId id) -> std::optional<TrackData>;
auto dbPutHash(const uint64_t& hash, TrackId i) -> void;
auto dbGetHash(const uint64_t& hash) -> std::optional<TrackId>;
- auto dbPutTrack(TrackId id, const std::string& path, const uint64_t& hash)
- -> void;
+ auto dbCreateIndexesForTrack(Track track) -> void;
template <typename T>
auto dbGetPage(const Continuation<T>& c) -> Result<T>*;
@@ -130,6 +148,10 @@ class Database {
};
template <>
+auto Database::ParseRecord<IndexRecord>(const leveldb::Slice& key,
+ const leveldb::Slice& val)
+ -> std::optional<IndexRecord>;
+template <>
auto Database::ParseRecord<Track>(const leveldb::Slice& key,
const leveldb::Slice& val)
-> std::optional<Track>;
diff --git a/src/database/include/index.hpp b/src/database/include/index.hpp
new file mode 100644
index 00000000..17229164
--- /dev/null
+++ b/src/database/include/index.hpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <cstdint>
+#include <string>
+#include <variant>
+#include <vector>
+
+#include "leveldb/db.h"
+#include "leveldb/slice.h"
+
+#include "leveldb/write_batch.h"
+#include "shared_string.h"
+#include "track.hpp"
+
+namespace database {
+
+typedef uint8_t IndexId;
+
+struct IndexInfo {
+ // Unique id for this index
+ IndexId id;
+ // Localised, user-friendly description of this index. e.g. "Albums by Artist"
+ // or "All Tracks".
+ std::string name;
+ // Specifier for how this index breaks down the database.
+ std::vector<Tag> components;
+};
+
+struct IndexKey {
+ struct Header {
+ // The index that this key was created for.
+ IndexId id;
+ // The number of components of IndexInfo that have already been filtered.
+ // For example, if an index consists of { kGenre, kArtist }, and this key
+ // represents an artist, then depth = 1.
+ std::uint8_t depth;
+ // The cumulative hash of all filtered components, in order. For example, if
+ // an index consists of { kArtist, kAlbum, kTitle }, and we are at depth = 2
+ // then this may contain hash(hash("Jacqueline"), "My Cool Album").
+ std::uint64_t components_hash;
+ };
+ Header header;
+
+ // The filterable / selectable item that this key represents. "Jacqueline" for
+ // kArtist, "My Cool Album" for kAlbum, etc.
+ std::optional<std::string> item;
+ // If this is a leaf component, the track id for this record.
+ // This could reasonably be the value for a record, but we keep it as a part
+ // of the key to help with disambiguation.
+ std::optional<TrackId> track;
+};
+
+auto Index(const IndexInfo&, const Track&, leveldb::WriteBatch*) -> bool;
+auto ExpandHeader(const IndexKey::Header&, const std::optional<std::string>&)
+ -> IndexKey::Header;
+
+// Predefined indexes
+// TODO(jacqueline): Make these defined at runtime! :)
+
+extern const IndexInfo kAlbumsByArtist;
+extern const IndexInfo kTracksByGenre;
+extern const IndexInfo kAllTracks;
+
+} // namespace database
diff --git a/src/database/include/records.hpp b/src/database/include/records.hpp
index 95a1a1e8..58f29b20 100644
--- a/src/database/include/records.hpp
+++ b/src/database/include/records.hpp
@@ -9,10 +9,14 @@
#include <stdint.h>
#include <string>
+#include <variant>
+#include <vector>
#include "leveldb/db.h"
#include "leveldb/slice.h"
+#include "index.hpp"
+#include "shared_string.h"
#include "track.hpp"
namespace database {
@@ -34,39 +38,49 @@ class OwningSlice {
* Returns the prefix added to every TrackData key. This can be used to iterate
* over every data record in the database.
*/
-auto CreateDataPrefix() -> OwningSlice;
+auto EncodeDataPrefix() -> OwningSlice;
-/* Creates a data key for a track with the specified id. */
-auto CreateDataKey(const TrackId& id) -> OwningSlice;
+/* Encodes a data key for a track with the specified id. */
+auto EncodeDataKey(const TrackId& id) -> OwningSlice;
/*
* Encodes a TrackData instance into bytes, in preparation for storing it within
* the database. This encoding is consistent, and will remain stable over time.
*/
-auto CreateDataValue(const TrackData& track) -> OwningSlice;
+auto EncodeDataValue(const TrackData& track) -> OwningSlice;
/*
- * Parses bytes previously encoded via CreateDataValue back into a TrackData.
+ * Parses bytes previously encoded via EncodeDataValue back into a TrackData.
* May return nullopt if parsing fails.
*/
auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<TrackData>;
-/* Creates a hash key for the specified hash. */
-auto CreateHashKey(const uint64_t& hash) -> OwningSlice;
+/* Encodes a hash key for the specified hash. */
+auto EncodeHashKey(const uint64_t& hash) -> OwningSlice;
/*
* Encodes a hash value (at this point just a track id) into bytes, in
* preparation for storing within the database. This encoding is consistent, and
* will remain stable over time.
*/
-auto CreateHashValue(TrackId id) -> OwningSlice;
+auto EncodeHashValue(TrackId id) -> OwningSlice;
/*
- * Parses bytes previously encoded via CreateHashValue back into a track id. May
+ * Parses bytes previously encoded via EncodeHashValue back into a track id. May
* return nullopt if parsing fails.
*/
auto ParseHashValue(const leveldb::Slice&) -> std::optional<TrackId>;
+/* Encodes a prefix that matches all index keys, of all ids and depths. */
+auto EncodeAllIndexesPrefix() -> OwningSlice;
+
+/*
+ */
+auto EncodeIndexPrefix(const IndexKey::Header&) -> OwningSlice;
+
+auto EncodeIndexKey(const IndexKey&) -> OwningSlice;
+auto ParseIndexKey(const leveldb::Slice&) -> std::optional<IndexKey>;
+
/* Encodes a TrackId as bytes. */
auto TrackIdToBytes(TrackId id) -> OwningSlice;
diff --git a/src/database/include/track.hpp b/src/database/include/track.hpp
index 5a0c0ca8..e3f94db4 100644
--- a/src/database/include/track.hpp
+++ b/src/database/include/track.hpp
@@ -8,11 +8,14 @@
#include <stdint.h>
+#include <map>
+#include <memory>
#include <optional>
#include <string>
#include <utility>
#include "leveldb/db.h"
+#include "shared_string.h"
#include "span.hpp"
namespace database {
@@ -41,25 +44,33 @@ enum class Encoding {
kFlac = 4,
};
+enum class Tag {
+ kTitle = 0,
+ kArtist = 1,
+ kAlbum = 2,
+ kAlbumTrack = 3,
+ kGenre = 4,
+};
+
/*
* Owning container for tag-related track metadata that was extracted from a
* file.
*/
-struct TrackTags {
- Encoding encoding;
- std::optional<std::string> title;
-
- // TODO(jacqueline): It would be nice to use shared_ptr's for the artist and
- // album, since there's likely a fair number of duplicates for each
- // (especially the former).
+class TrackTags {
+ public:
+ auto encoding() const -> Encoding { return encoding_; };
+ auto encoding(Encoding e) -> void { encoding_ = e; };
- std::optional<std::string> artist;
- std::optional<std::string> album;
+ TrackTags() : encoding_(Encoding::kUnsupported) {}
std::optional<int> channels;
std::optional<int> sample_rate;
std::optional<int> bits_per_sample;
+ auto set(const Tag& key, const std::string& val) -> void;
+ auto at(const Tag& key) const -> std::optional<shared_string>;
+ auto operator[](const Tag& key) const -> std::optional<shared_string>;
+
/*
* Returns a hash of the 'identifying' tags of this track. That is, a hash
* that can be used to determine if one track is likely the same as another,
@@ -69,6 +80,12 @@ struct TrackTags {
auto Hash() const -> uint64_t;
bool operator==(const TrackTags&) const = default;
+ TrackTags& operator=(const TrackTags&) = default;
+ TrackTags(const TrackTags&) = default;
+
+ private:
+ Encoding encoding_;
+ std::map<Tag, shared_string> tags_;
};
/*
@@ -156,6 +173,8 @@ class Track {
auto data() const -> const TrackData& { return data_; }
auto tags() const -> const TrackTags& { return tags_; }
+ auto TitleOrFilename() const -> shared_string;
+
bool operator==(const Track&) const = default;
Track operator=(const Track& other) const { return Track(other); }
diff --git a/src/database/index.cpp b/src/database/index.cpp
new file mode 100644
index 00000000..a828578d
--- /dev/null
+++ b/src/database/index.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "index.hpp"
+#include <stdint.h>
+#include <variant>
+#include "komihash.h"
+#include "leveldb/write_batch.h"
+#include "records.hpp"
+
+namespace database {
+
+const IndexInfo kAlbumsByArtist{
+ .id = 1,
+ .name = "Albums by Artist",
+ .components = {Tag::kArtist, Tag::kAlbum, Tag::kAlbumTrack},
+};
+
+const IndexInfo kTracksByGenre{
+ .id = 2,
+ .name = "Tracks by Genre",
+ .components = {Tag::kGenre, Tag::kTitle},
+};
+
+const IndexInfo kAllTracks{
+ .id = 3,
+ .name = "All Tracks",
+ .components = {Tag::kTitle},
+};
+
+auto Index(const IndexInfo& info, const Track& t, leveldb::WriteBatch* batch)
+ -> bool {
+ IndexKey key{
+ .header{
+ .id = info.id,
+ .depth = 0,
+ .components_hash = 0,
+ },
+ .item = {},
+ .track = {},
+ };
+
+ for (std::uint8_t i = 0; i < info.components.size(); i++) {
+ // Fill in the text for this depth.
+ auto text = t.tags().at(info.components.at(i));
+ if (text) {
+ key.item = *text;
+ } else {
+ key.item = {};
+ }
+
+ // If this is the last component, then we should also fill in the track id.
+ if (i == info.components.size() - 1) {
+ key.track = t.data().id();
+ } else {
+ key.track = {};
+ }
+
+ auto encoded = EncodeIndexKey(key);
+ batch->Put(encoded.slice, leveldb::Slice{});
+
+ // If there are more components after this, then we need to finish by
+ // narrowing the header with the current title.
+ if (i < info.components.size() - 1) {
+ key.header = ExpandHeader(key.header, key.item);
+ }
+ }
+ return true;
+}
+
+auto ExpandHeader(const IndexKey::Header& header,
+ const std::optional<std::string>& component)
+ -> IndexKey::Header {
+ IndexKey::Header ret{header};
+ ret.depth++;
+ if (component) {
+ ret.components_hash =
+ komihash(component->data(), component->size(), ret.components_hash);
+ } else {
+ ret.components_hash = komihash(NULL, 0, ret.components_hash);
+ }
+ return ret;
+}
+
+} // namespace database
diff --git a/src/database/records.cpp b/src/database/records.cpp
index 49e5db0b..72608eb0 100644
--- a/src/database/records.cpp
+++ b/src/database/records.cpp
@@ -8,20 +8,43 @@
#include <stdint.h>
+#include <iomanip>
#include <sstream>
+#include <string>
#include <vector>
#include "cbor.h"
#include "esp_log.h"
+#include "index.hpp"
+#include "komihash.h"
+#include "shared_string.h"
#include "track.hpp"
+// As LevelDB is a key-value store, each record in the database consists of a
+// key and an optional value.
+//
+// Values, when present, are always cbor-encoded. This is fast, compact, and
+// very easy to evolve over time due to its inclusion of type information.
+//
+// Keys have a more complicated scheme, as for performance we rely heavily on
+// LevelDB's sorted storage format. We must therefore worry about clustering of
+// similar records, and the sortability of our encoding format.
+// Each kind of key consists of a a single-byte prefix, then one or more
+// fields separated by null (0) bytes. Each field may be cbor-encoded, or may
+// use some bespoke encoding; it depends on whether we want to be able to sort
+// by that field.
+// For debugging and discussion purposes, we represent field separators
+// textually as '/', and write each field as its hex encoding. e.g. a data key
+// for the track with id 17 would be written as 'D / 0x11'.
+
namespace database {
static const char* kTag = "RECORDS";
static const char kDataPrefix = 'D';
static const char kHashPrefix = 'H';
+static const char kIndexPrefix = 'I';
static const char kFieldSeparator = '\0';
/*
@@ -39,6 +62,8 @@ static const char kFieldSeparator = '\0';
template <typename T>
auto cbor_encode(uint8_t** out_buf, T fn) -> std::size_t {
// First pass: work out how many bytes we will encode into.
+ // FIXME: With benchmarking to help, we could consider preallocting a small
+ // buffer here to do the whole encoding in one pass.
CborEncoder size_encoder;
cbor_encoder_init(&size_encoder, NULL, 0, 0);
std::invoke(fn, &size_encoder);
@@ -55,19 +80,21 @@ auto cbor_encode(uint8_t** out_buf, T fn) -> std::size_t {
OwningSlice::OwningSlice(std::string d) : data(d), slice(data) {}
-auto CreateDataPrefix() -> OwningSlice {
+/* 'D/' */
+auto EncodeDataPrefix() -> OwningSlice {
char data[2] = {kDataPrefix, kFieldSeparator};
return OwningSlice({data, 2});
}
-auto CreateDataKey(const TrackId& id) -> OwningSlice {
+/* 'D/ 0xACAB' */
+auto EncodeDataKey(const TrackId& id) -> OwningSlice {
std::ostringstream output;
output.put(kDataPrefix).put(kFieldSeparator);
output << TrackIdToBytes(id).data;
return OwningSlice(output.str());
}
-auto CreateDataValue(const TrackData& track) -> OwningSlice {
+auto EncodeDataValue(const TrackData& track) -> OwningSlice {
uint8_t* buf;
std::size_t buf_len = cbor_encode(&buf, [&](CborEncoder* enc) {
CborEncoder array_encoder;
@@ -179,7 +206,8 @@ auto ParseDataValue(const leveldb::Slice& slice) -> std::optional<TrackData> {
return TrackData(id, path, hash, play_count, is_tombstoned);
}
-auto CreateHashKey(const uint64_t& hash) -> OwningSlice {
+/* 'H/ 0xBEEF' */
+auto EncodeHashKey(const uint64_t& hash) -> OwningSlice {
std::ostringstream output;
output.put(kHashPrefix).put(kFieldSeparator);
@@ -197,10 +225,183 @@ auto ParseHashValue(const leveldb::Slice& slice) -> std::optional<TrackId> {
return BytesToTrackId(slice.ToString());
}
-auto CreateHashValue(TrackId id) -> OwningSlice {
+auto EncodeHashValue(TrackId id) -> OwningSlice {
return TrackIdToBytes(id);
}
+/* 'I/' */
+auto EncodeAllIndexesPrefix() -> OwningSlice {
+ char data[2] = {kIndexPrefix, kFieldSeparator};
+ return OwningSlice({data, 2});
+}
+
+auto AppendIndexHeader(const IndexKey::Header& header, std::ostringstream* out)
+ -> void {
+ *out << kIndexPrefix << kFieldSeparator;
+
+ // Construct the header.
+ uint8_t* buf;
+ std::size_t buf_len = cbor_encode(&buf, [&](CborEncoder* enc) {
+ CborEncoder array_encoder;
+ CborError err;
+ err = cbor_encoder_create_array(enc, &array_encoder, 3);
+ if (err != CborNoError && err != CborErrorOutOfMemory) {
+ ESP_LOGE(kTag, "encoding err %u", err);
+ return;
+ }
+ err = cbor_encode_uint(&array_encoder, header.id);
+ if (err != CborNoError && err != CborErrorOutOfMemory) {
+ ESP_LOGE(kTag, "encoding err %u", err);
+ return;
+ }
+ err = cbor_encode_uint(&array_encoder, header.depth);
+ if (err != CborNoError && err != CborErrorOutOfMemory) {
+ ESP_LOGE(kTag, "encoding err %u", err);
+ return;
+ }
+ err = cbor_encode_uint(&array_encoder, header.components_hash);
+ if (err != CborNoError && err != CborErrorOutOfMemory) {
+ ESP_LOGE(kTag, "encoding err %u", err);
+ return;
+ }
+ err = cbor_encoder_close_container(enc, &array_encoder);
+ if (err != CborNoError && err != CborErrorOutOfMemory) {
+ ESP_LOGE(kTag, "encoding err %u", err);
+ return;
+ }
+ });
+ std::string encoded{reinterpret_cast<char*>(buf), buf_len};
+ delete buf;
+ *out << encoded << kFieldSeparator;
+}
+
+auto EncodeIndexPrefix(const IndexKey::Header& header) -> OwningSlice {
+ std::ostringstream out;
+ AppendIndexHeader(header, &out);
+ return OwningSlice(out.str());
+}
+
+/*
+ * 'I/0xa2/0x686921/0xb9'
+ * ^ --- trailer
+ * ^ --- component ("hi!")
+ * ^ -------- header
+ *
+ * The components *must* be encoded in a way that is easy to sort
+ * lexicographically. The header and footer do not have this restriction, so
+ * cbor is fine.
+ *
+ * We store grouping information within the header; which index, filtered
+ * components. We store disambiguation information in the trailer; just a track
+ * id for now, but could reasonably be something like 'release year' as well.
+ */
+auto EncodeIndexKey(const IndexKey& key) -> OwningSlice {
+ std::ostringstream out;
+
+ // Construct the header.
+ AppendIndexHeader(key.header, &out);
+
+ // The component should already be UTF-8 encoded, so just write it.
+ if (key.item) {
+ out << *key.item;
+ }
+
+ // Construct the footer.
+ out << kFieldSeparator;
+ if (key.track) {
+ out << TrackIdToBytes(*key.track).data;
+ }
+ return OwningSlice(out.str());
+}
+
+auto ParseIndexKey(const leveldb::Slice& slice) -> std::optional<IndexKey> {
+ IndexKey result{};
+
+ auto prefix = EncodeAllIndexesPrefix();
+ if (!slice.starts_with(prefix.data)) {
+ return {};
+ }
+
+ std::string key_data = slice.ToString().substr(prefix.data.size());
+ std::size_t header_length = 0;
+ {
+ CborParser parser;
+ CborValue container;
+ CborError err;
+ err = cbor_parser_init(reinterpret_cast<const uint8_t*>(key_data.data()),
+ key_data.size(), 0, &parser, &container);
+ if (err != CborNoError || !cbor_value_is_container(&container)) {
+ return {};
+ }
+
+ CborValue val;
+ err = cbor_value_enter_container(&container, &val);
+ if (err != CborNoError || !cbor_value_is_unsigned_integer(&val)) {
+ return {};
+ }
+
+ uint64_t raw_int;
+ err = cbor_value_get_uint64(&val, &raw_int);
+ if (err != CborNoError) {
+ return {};
+ }
+ result.header.id = raw_int;
+ err = cbor_value_advance(&val);
+ if (err != CborNoError || !cbor_value_is_unsigned_integer(&val)) {
+ return {};
+ }
+
+ err = cbor_value_get_uint64(&val, &raw_int);
+ if (err != CborNoError) {
+ return {};
+ }
+ result.header.depth = raw_int;
+ err = cbor_value_advance(&val);
+ if (err != CborNoError || !cbor_value_is_unsigned_integer(&val)) {
+ return {};
+ }
+
+ err = cbor_value_get_uint64(&val, &raw_int);
+ if (err != CborNoError) {
+ return {};
+ }
+ result.header.components_hash = raw_int;
+ err = cbor_value_advance(&val);
+ if (err != CborNoError || !cbor_value_at_end(&val)) {
+ return {};
+ }
+
+ const uint8_t* next_byte = cbor_value_get_next_byte(&val);
+ header_length =
+ next_byte - reinterpret_cast<const uint8_t*>(key_data.data());
+ }
+
+ if (header_length == 0) {
+ return {};
+ }
+
+ if (header_length >= key_data.size()) {
+ return {};
+ }
+
+ std::istringstream in(key_data.substr(header_length + 1));
+ std::stringbuf buffer{};
+
+ in.get(buffer, kFieldSeparator);
+ if (buffer.str().size() > 0) {
+ result.item = buffer.str();
+ }
+
+ buffer = {};
+ in.get(buffer);
+ if (buffer.str().size() > 1) {
+ std::string raw_id = buffer.str().substr(1);
+ result.track = BytesToTrackId(raw_id);
+ }
+
+ return result;
+}
+
auto TrackIdToBytes(TrackId id) -> OwningSlice {
uint8_t buf[8];
CborEncoder enc;
diff --git a/src/database/tag_parser.cpp b/src/database/tag_parser.cpp
index 83b0a796..49febe27 100644
--- a/src/database/tag_parser.cpp
+++ b/src/database/tag_parser.cpp
@@ -12,6 +12,23 @@
namespace database {
+auto convert_tag(int tag) -> std::optional<Tag> {
+ switch (tag) {
+ case Ttitle:
+ return Tag::kTitle;
+ case Tartist:
+ return Tag::kArtist;
+ case Talbum:
+ return Tag::kAlbum;
+ case Ttrack:
+ return Tag::kAlbumTrack;
+ case Tgenre:
+ return Tag::kGenre;
+ default:
+ return {};
+ }
+}
+
namespace libtags {
struct Aux {
@@ -55,12 +72,9 @@ static void tag(Tagctx* ctx,
int size,
Tagread f) {
Aux* aux = reinterpret_cast<Aux*>(ctx->aux);
- if (t == Ttitle) {
- aux->tags->title = v;
- } else if (t == Tartist) {
- aux->tags->artist = v;
- } else if (t == Talbum) {
- aux->tags->album = v;
+ auto tag = convert_tag(t);
+ if (tag) {
+ aux->tags->set(*tag, v);
}
}
@@ -108,19 +122,19 @@ auto TagParserImpl::ReadAndParseTags(const std::string& path, TrackTags* out)
switch (ctx.format) {
case Fmp3:
- out->encoding = Encoding::kMp3;
+ out->encoding(Encoding::kMp3);
break;
case Fogg:
- out->encoding = Encoding::kOgg;
+ out->encoding(Encoding::kOgg);
break;
case Fflac:
- out->encoding = Encoding::kFlac;
+ out->encoding(Encoding::kFlac);
break;
case Fwav:
- out->encoding = Encoding::kWav;
+ out->encoding(Encoding::kWav);
break;
default:
- out->encoding = Encoding::kUnsupported;
+ out->encoding(Encoding::kUnsupported);
}
if (ctx.channels > 0) {
diff --git a/src/database/track.cpp b/src/database/track.cpp
index 00acc1f6..dc33701d 100644
--- a/src/database/track.cpp
+++ b/src/database/track.cpp
@@ -7,11 +7,28 @@
#include "track.hpp"
#include <komihash.h>
+#include "shared_string.h"
namespace database {
+auto TrackTags::set(const Tag& key, const std::string& val) -> void {
+ tags_[key] = val;
+}
+
+auto TrackTags::at(const Tag& key) const -> std::optional<shared_string> {
+ if (tags_.contains(key)) {
+ return tags_.at(key);
+ }
+ return {};
+}
+
+auto TrackTags::operator[](const Tag& key) const
+ -> std::optional<shared_string> {
+ return at(key);
+}
+
/* Helper function to update a komihash stream with a std::string. */
-auto HashString(komihash_stream_t* stream, std::string str) -> void {
+auto HashString(komihash_stream_t* stream, const std::string& str) -> void {
komihash_stream_update(stream, str.c_str(), str.length());
}
@@ -24,9 +41,11 @@ auto TrackTags::Hash() const -> uint64_t {
// tags at all.
komihash_stream_t stream;
komihash_stream_init(&stream, 0);
- HashString(&stream, title.value_or(""));
- HashString(&stream, artist.value_or(""));
- HashString(&stream, album.value_or(""));
+
+ HashString(&stream, at(Tag::kTitle).value_or(""));
+ HashString(&stream, at(Tag::kArtist).value_or(""));
+ HashString(&stream, at(Tag::kAlbum).value_or(""));
+
return komihash_stream_final(&stream);
}
@@ -48,4 +67,16 @@ void swap(Track& first, Track& second) {
second = temp;
}
+auto Track::TitleOrFilename() const -> shared_string {
+ auto title = tags().at(Tag::kTitle);
+ if (title) {
+ return *title;
+ }
+ auto start = data().filepath().find_last_of('/');
+ if (start == std::string::npos) {
+ return data().filepath();
+ }
+ return data().filepath().substr(start);
+}
+
} // namespace database