summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjacqueline <me@jacqueline.id.au>2023-10-30 15:47:38 +1100
committerjacqueline <me@jacqueline.id.au>2023-10-30 15:52:26 +1100
commitb58c08150853b8055093dc116d407ffd543f8ec8 (patch)
tree9b82d130d2c833fc234bca0f12f0fba1b7202c4d /src
parent18d90051c9145ead86d4da701a2bc54f45e4fb66 (diff)
downloadtangara-fw-b58c08150853b8055093dc116d407ffd543f8ec8.tar.gz
add locale-aware colation to db indexes
Diffstat (limited to 'src')
-rw-r--r--src/database/database.cpp18
-rw-r--r--src/database/include/database.hpp8
-rw-r--r--src/database/include/index.hpp3
-rw-r--r--src/database/index.cpp10
-rw-r--r--src/locale/CMakeLists.txt11
-rw-r--r--src/locale/collation.cpp92
-rw-r--r--src/locale/include/collation.hpp49
-rw-r--r--src/locale/include/strxfrm.h35
-rw-r--r--src/locale/priv_include/weight.h160
-rw-r--r--src/locale/strxfrm_l.c641
-rw-r--r--src/system_fsm/CMakeLists.txt2
-rw-r--r--src/system_fsm/booting.cpp2
-rw-r--r--src/system_fsm/include/service_locator.hpp11
-rw-r--r--src/system_fsm/running.cpp4
14 files changed, 1030 insertions, 16 deletions
diff --git a/src/database/database.cpp b/src/database/database.cpp
index e05fa26a..9b978b8c 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -17,6 +17,7 @@
#include <optional>
#include <sstream>
+#include "collation.hpp"
#include "esp_log.h"
#include "ff.h"
#include "freertos/projdefs.h"
@@ -49,7 +50,7 @@ static SingletonEnv<leveldb::EspEnv> sEnv;
static const char kDbPath[] = "/.tangara-db";
static const char kKeyDbVersion[] = "schema_version";
-static const uint8_t kCurrentDbVersion = 2;
+static const uint8_t kCurrentDbVersion = 3;
static const char kKeyTrackId[] = "next_track_id";
@@ -73,7 +74,9 @@ static auto CreateNewDatabase(leveldb::Options& options) -> leveldb::DB* {
return db;
}
-auto Database::Open(IFileGatherer& gatherer, ITagParser& parser)
+auto Database::Open(IFileGatherer& gatherer,
+ ITagParser& parser,
+ locale::ICollator& collator)
-> cpp::result<Database*, DatabaseError> {
// TODO(jacqueline): Why isn't compare_and_exchange_* available?
if (sIsDbOpen.exchange(true)) {
@@ -127,7 +130,8 @@ auto Database::Open(IFileGatherer& gatherer, ITagParser& parser)
}
ESP_LOGI(kTag, "Database opened successfully");
- return new Database(db, cache.release(), gatherer, parser, worker);
+ return new Database(db, cache.release(), gatherer, parser, collator,
+ worker);
})
.get();
}
@@ -142,12 +146,14 @@ Database::Database(leveldb::DB* db,
leveldb::Cache* cache,
IFileGatherer& file_gatherer,
ITagParser& tag_parser,
+ locale::ICollator& collator,
std::shared_ptr<tasks::Worker> worker)
: db_(db),
cache_(cache),
worker_task_(worker),
file_gatherer_(file_gatherer),
- tag_parser_(tag_parser) {}
+ tag_parser_(tag_parser),
+ collator_(collator) {}
Database::~Database() {
// Delete db_ first so that any outstanding background work finishes before
@@ -539,7 +545,7 @@ auto Database::dbGetHash(const uint64_t& hash) -> std::optional<TrackId> {
auto Database::dbCreateIndexesForTrack(const Track& track) -> void {
for (const IndexInfo& index : GetIndexes()) {
leveldb::WriteBatch writes;
- auto entries = Index(index, track);
+ auto entries = Index(collator_, index, track);
for (const auto& it : entries) {
writes.Put(EncodeIndexKey(it.first),
{it.second.data(), it.second.size()});
@@ -555,7 +561,7 @@ auto Database::dbRemoveIndexes(std::shared_ptr<TrackData> data) -> void {
}
Track track{data, tags};
for (const IndexInfo& index : GetIndexes()) {
- auto entries = Index(index, track);
+ auto entries = Index(collator_, index, track);
for (auto it = entries.rbegin(); it != entries.rend(); it++) {
auto key = EncodeIndexKey(it->first);
auto status = db_->Delete(leveldb::WriteOptions{}, key);
diff --git a/src/database/include/database.hpp b/src/database/include/database.hpp
index cdf69db0..5eb3a8e9 100644
--- a/src/database/include/database.hpp
+++ b/src/database/include/database.hpp
@@ -16,6 +16,7 @@
#include <utility>
#include <vector>
+#include "collation.hpp"
#include "file_gatherer.hpp"
#include "index.hpp"
#include "leveldb/cache.h"
@@ -92,9 +93,10 @@ class Database {
ALREADY_OPEN,
FAILED_TO_OPEN,
};
- static auto Open(IFileGatherer& file_gatherer, ITagParser& tag_parser)
+ static auto Open(IFileGatherer& file_gatherer,
+ ITagParser& tag_parser,
+ locale::ICollator& collator)
-> cpp::result<Database*, DatabaseError>;
- static auto Open() -> cpp::result<Database*, DatabaseError>;
static auto Destroy() -> void;
@@ -136,11 +138,13 @@ class Database {
// Not owned.
IFileGatherer& file_gatherer_;
ITagParser& tag_parser_;
+ locale::ICollator& collator_;
Database(leveldb::DB* db,
leveldb::Cache* cache,
IFileGatherer& file_gatherer,
ITagParser& tag_parser,
+ locale::ICollator& collator,
std::shared_ptr<tasks::Worker> worker);
auto dbMintNewTrackId() -> TrackId;
diff --git a/src/database/include/index.hpp b/src/database/include/index.hpp
index 13de952d..15b21ee8 100644
--- a/src/database/include/index.hpp
+++ b/src/database/include/index.hpp
@@ -13,6 +13,7 @@
#include <variant>
#include <vector>
+#include "collation.hpp"
#include "leveldb/db.h"
#include "leveldb/slice.h"
@@ -60,7 +61,7 @@ struct IndexKey {
std::optional<TrackId> track;
};
-auto Index(const IndexInfo&, const Track&)
+auto Index(locale::ICollator&, const IndexInfo&, const Track&)
-> std::vector<std::pair<IndexKey, std::pmr::string>>;
auto ExpandHeader(const IndexKey::Header&,
diff --git a/src/database/index.cpp b/src/database/index.cpp
index 84ea050a..7d556192 100644
--- a/src/database/index.cpp
+++ b/src/database/index.cpp
@@ -7,8 +7,11 @@
#include "index.hpp"
#include <cstdint>
+#include <sstream>
#include <variant>
+#include "collation.hpp"
+#include "esp_log.h"
#include "komihash.h"
#include "leveldb/write_batch.h"
@@ -59,7 +62,7 @@ static auto missing_component_text(const Track& track, Tag tag)
}
}
-auto Index(const IndexInfo& info, const Track& t)
+auto Index(locale::ICollator& collator, const IndexInfo& info, const Track& t)
-> std::vector<std::pair<IndexKey, std::pmr::string>> {
std::vector<std::pair<IndexKey, std::pmr::string>> out;
IndexKey key{
@@ -72,15 +75,14 @@ auto Index(const IndexInfo& info, const Track& t)
.track = {},
};
- auto& col = std::use_facet<std::collate<char>>(std::locale());
-
for (std::uint8_t i = 0; i < info.components.size(); i++) {
// Fill in the text for this depth.
auto text = t.tags().at(info.components.at(i));
std::pmr::string value;
if (text) {
std::pmr::string orig = *text;
- key.item = col.transform(&orig[0], &orig[0] + orig.size());
+ auto xfrm = collator.Transform({orig.data(), orig.size()});
+ key.item = {xfrm.data(), xfrm.size()};
value = *text;
} else {
key.item = {};
diff --git a/src/locale/CMakeLists.txt b/src/locale/CMakeLists.txt
new file mode 100644
index 00000000..627ca314
--- /dev/null
+++ b/src/locale/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Copyright 2023 jacqueline <me@jacqueline.id.au>
+#
+# SPDX-License-Identifier: GPL-3.0-only
+
+idf_component_register(
+ SRCS "collation.cpp" "strxfrm_l.c"
+ INCLUDE_DIRS "include"
+ PRIV_INCLUDE_DIRS "priv_include"
+ REQUIRES "span" "esp_partition" "spi_flash")
+
+target_compile_options(${COMPONENT_LIB} PRIVATE ${EXTRA_WARNINGS})
diff --git a/src/locale/collation.cpp b/src/locale/collation.cpp
new file mode 100644
index 00000000..f5e8272a
--- /dev/null
+++ b/src/locale/collation.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "collation.hpp"
+
+#include <stdint.h>
+#include <memory>
+
+#include "esp_flash_spi_init.h"
+#include "esp_log.h"
+#include "esp_partition.h"
+#include "hal/spi_flash_types.h"
+#include "spi_flash_mmap.h"
+#include "strxfrm.h"
+
+namespace locale {
+
+static constexpr char kTag[] = "collate";
+
+static constexpr esp_partition_type_t kLocalePartitionType =
+ static_cast<esp_partition_type_t>(0x40);
+static constexpr esp_partition_subtype_t kLcCollateSubtype =
+ static_cast<esp_partition_subtype_t>(0x0);
+
+auto CreateCollator() -> std::unique_ptr<ICollator> {
+ std::unique_ptr<ICollator> ret{GLibCollator::create()};
+ if (!ret) {
+ ret.reset(new NoopCollator());
+ }
+ return ret;
+}
+
+auto GLibCollator::create() -> GLibCollator* {
+ uint32_t data_pages = spi_flash_mmap_get_free_pages(SPI_FLASH_MMAP_DATA);
+ ESP_LOGI(kTag, "free data pages: %lu (%lu KiB)", data_pages, data_pages * 64);
+
+ const esp_partition_t* partition =
+ esp_partition_find_first(kLocalePartitionType, kLcCollateSubtype, NULL);
+ if (partition == NULL) {
+ ESP_LOGW(kTag, "no LC_COLLATE partition found");
+ }
+
+ ESP_LOGI(kTag, "found LC_COLLATE partition of size %lu", partition->size);
+ if (partition->size > data_pages * 64 * 1024) {
+ ESP_LOGE(kTag, "not enough free pages to map LC_COLLATE partition!");
+ return nullptr;
+ }
+
+ const void* region;
+ esp_partition_mmap_handle_t handle;
+ esp_err_t err = esp_partition_mmap(partition, 0, partition->size,
+ ESP_PARTITION_MMAP_DATA, &region, &handle);
+ if (err != ESP_OK) {
+ ESP_LOGE(kTag, "LC_COLLATE mmap failed");
+ return nullptr;
+ }
+
+ auto data = std::make_unique<locale_data_t>();
+ if (!parse_locale_data(region, partition->size, data.get())) {
+ ESP_LOGE(kTag, "parsing locale data failed");
+ esp_partition_munmap(handle);
+ return nullptr;
+ }
+
+ return new GLibCollator(handle, std::move(data));
+}
+
+GLibCollator::GLibCollator(const esp_partition_mmap_handle_t handle,
+ std::unique_ptr<locale_data_t> locale)
+ : handle_(handle), locale_data_(std::move(locale)) {}
+
+GLibCollator::~GLibCollator() {
+ esp_partition_munmap(handle_);
+}
+
+auto GLibCollator::Transform(const std::string& in) -> std::string {
+ char dest[256];
+ size_t size = glib_strxfrm(dest, in.c_str(), 256, locale_data_.get());
+ if (size >= 256) {
+ char* larger_dest = new char[size + 1]{0};
+ glib_strxfrm(larger_dest, in.c_str(), size, locale_data_.get());
+ std::string out{larger_dest, size};
+ delete[] larger_dest;
+ return out;
+ }
+ return {dest, size};
+}
+
+} // namespace locale
diff --git a/src/locale/include/collation.hpp b/src/locale/include/collation.hpp
new file mode 100644
index 00000000..e8b6fa17
--- /dev/null
+++ b/src/locale/include/collation.hpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <memory>
+#include <string>
+
+#include "esp_partition.h"
+#include "span.hpp"
+
+#include "strxfrm.h"
+
+namespace locale {
+
+class ICollator {
+ public:
+ virtual ~ICollator() {}
+
+ virtual auto Transform(const std::string&) -> std::string = 0;
+};
+
+class NoopCollator : public ICollator {
+ public:
+ auto Transform(const std::string& in) -> std::string override { return in; }
+};
+
+auto CreateCollator() -> std::unique_ptr<ICollator>;
+
+class GLibCollator : public ICollator {
+ public:
+ static auto create() -> GLibCollator*;
+ ~GLibCollator();
+
+ auto Transform(const std::string& in) -> std::string override;
+
+ private:
+ GLibCollator(const esp_partition_mmap_handle_t,
+ std::unique_ptr<locale_data_t>);
+
+ const esp_partition_mmap_handle_t handle_;
+ std::unique_ptr<locale_data_t> locale_data_;
+};
+
+} // namespace locale
diff --git a/src/locale/include/strxfrm.h b/src/locale/include/strxfrm.h
new file mode 100644
index 00000000..a40f7596
--- /dev/null
+++ b/src/locale/include/strxfrm.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ uint_fast32_t nrules;
+ unsigned char* rulesets;
+ unsigned char* weights;
+ int32_t* table;
+ unsigned char* extra;
+ int32_t* indirect;
+} locale_data_t;
+
+bool parse_locale_data(const void* raw_data, size_t size, locale_data_t* out);
+
+size_t glib_strxfrm(char* dest,
+ const char* src,
+ size_t n,
+ locale_data_t* locale);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/locale/priv_include/weight.h b/src/locale/priv_include/weight.h
new file mode 100644
index 00000000..34465d90
--- /dev/null
+++ b/src/locale/priv_include/weight.h
@@ -0,0 +1,160 @@
+/* Copyright (C) 1996-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Ulrich Drepper, <drepper@cygnus.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#pragma once
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* This alignment is used for 32-bit integers in locale files, both
+ those that are explicitly int32_t or uint32_t and those that are
+ wchar_t, regardless of the (possibly smaller) alignment required
+ for such integers on a particular host. */
+#define LOCFILE_ALIGN sizeof (int32_t)
+#define LOCFILE_ALIGN_MASK (LOCFILE_ALIGN - 1)
+#define LOCFILE_ALIGN_UP(x) (((x) + LOCFILE_ALIGN - 1) \
+ & ~LOCFILE_ALIGN_MASK)
+#define LOCFILE_ALIGNED_P(x) (((x) & LOCFILE_ALIGN_MASK) == 0)
+
+/* Find index of weight. */
+static inline int32_t __attribute__ ((always_inline))
+findidx (const int32_t *table,
+ const int32_t *indirect,
+ const unsigned char *extra,
+ const unsigned char **cpp, size_t len)
+{
+ int_fast32_t i = table[*(*cpp)++];
+ const unsigned char *cp;
+ const unsigned char *usrc;
+
+ if (i >= 0)
+ /* This is an index into the weight table. Cool. */
+ return i;
+
+ /* Oh well, more than one sequence starting with this byte.
+ Search for the correct one. */
+ cp = &extra[-i];
+ usrc = *cpp;
+ --len;
+ while (1)
+ {
+ size_t nhere;
+
+ /* The first thing is the index. */
+ i = *((const int32_t *) cp);
+ cp += sizeof (int32_t);
+
+ /* Next is the length of the byte sequence. These are always
+ short byte sequences so there is no reason to call any
+ function (even if they are inlined). */
+ nhere = *cp++;
+
+ if (i >= 0)
+ {
+ /* It is a single character. If it matches we found our
+ index. Note that at the end of each list there is an
+ entry of length zero which represents the single byte
+ sequence. The first (and here only) byte was tested
+ already. */
+ size_t cnt;
+
+ /* With GCC 5.3 when compiling with -Os the compiler warns
+ that seq2.back_us, which becomes usrc, might be used
+ uninitialized. This can't be true because we pass a length
+ of -1 for len at the same time which means that this loop
+ never executes. */
+ for (cnt = 0; cnt < nhere && cnt < len; ++cnt)
+ if (cp[cnt] != usrc[cnt])
+ break;
+
+ if (cnt == nhere)
+ {
+ /* Found it. */
+ *cpp += nhere;
+ return i;
+ }
+
+ /* Up to the next entry. */
+ cp += nhere;
+ if (!LOCFILE_ALIGNED_P (1 + nhere))
+ cp += LOCFILE_ALIGN - (1 + nhere) % LOCFILE_ALIGN;
+ }
+ else
+ {
+ /* This is a range of characters. First decide whether the
+ current byte sequence lies in the range. */
+ size_t cnt;
+ size_t offset = 0;
+
+ for (cnt = 0; cnt < nhere && cnt < len; ++cnt)
+ if (cp[cnt] != usrc[cnt])
+ break;
+
+ if (cnt != nhere)
+ {
+ if (cnt == len || cp[cnt] > usrc[cnt])
+ {
+ /* Cannot be in this range. */
+ cp += 2 * nhere;
+ if (!LOCFILE_ALIGNED_P (1 + 2 * nhere))
+ cp += (LOCFILE_ALIGN
+ - (1 + 2 * nhere) % LOCFILE_ALIGN);
+ continue;
+ }
+
+ /* Test against the end of the range. */
+ for (cnt = 0; cnt < nhere; ++cnt)
+ if (cp[nhere + cnt] != usrc[cnt])
+ break;
+
+ if (cnt != nhere && cp[nhere + cnt] < usrc[cnt])
+ {
+ /* Cannot be in this range. */
+ cp += 2 * nhere;
+ if (!LOCFILE_ALIGNED_P (1 + 2 * nhere))
+ cp += (LOCFILE_ALIGN
+ - (1 + 2 * nhere) % LOCFILE_ALIGN);
+ continue;
+ }
+
+ /* This range matches the next characters. Now find
+ the offset in the indirect table. */
+ for (cnt = 0; cp[cnt] == usrc[cnt]; ++cnt);
+
+ do
+ {
+ offset <<= 8;
+ /* With GCC 7 when compiling with -Os the compiler
+ warns that seq1.back_us and seq2.back_us, which
+ become usrc, might be used uninitialized. This
+ is impossible for the same reason as described
+ above. */
+ offset += usrc[cnt] - cp[cnt];
+ }
+ while (++cnt < nhere);
+ }
+
+ *cpp += nhere;
+ return indirect[-i + offset];
+ }
+ }
+
+ /* NOTREACHED */
+ return 0x43219876;
+}
+
diff --git a/src/locale/strxfrm_l.c b/src/locale/strxfrm_l.c
new file mode 100644
index 00000000..198b59b3
--- /dev/null
+++ b/src/locale/strxfrm_l.c
@@ -0,0 +1,641 @@
+/* Copyright (C) 1995-2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Ulrich Drepper <drepper@gnu.org>, 1995.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "strxfrm.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include "esp_log.h"
+
+#include "weight.h"
+
+static const char kTag[] = "strxfrm";
+
+#ifndef STRING_TYPE
+#define STRING_TYPE char
+#define USTRING_TYPE unsigned char
+#define STRLEN strlen
+#define STPNCPY stpncpy
+#define L(arg) arg
+#endif
+
+#define CONCAT(a, b) CONCAT1(a, b)
+#define CONCAT1(a, b) a##b
+
+/* Maximum string size that is calculated with cached indices. Right now this
+ is an arbitrary value open to optimizations. SMALL_STR_SIZE * 4 has to be
+ lower than __MAX_ALLOCA_CUTOFF. Keep localedata/xfrm-test.c in sync. */
+#define SMALL_STR_SIZE 4095
+
+/* We know three kinds of collation sorting rules. */
+enum coll_sort_rule {
+ illegal_0__,
+ sort_forward,
+ sort_backward,
+ illegal_3__,
+ sort_position,
+ sort_forward_position,
+ sort_backward_position,
+ sort_mask
+};
+
+enum collate_element {
+ COLLATE_NRULES = 0,
+ COLLATE_RULESETS,
+ COLLATE_TABLEMB,
+ COLLATE_WEIGHTMB,
+ COLLATE_EXTRAMB,
+ COLLATE_INDIRECTMB,
+ COLLATE_GAP1,
+ COLLATE_GAP2,
+ COLLATE_GAP3,
+ COLLATE_TABLEWC,
+ COLLATE_WEIGHTWC,
+ COLLATE_EXTRAWC,
+ COLLATE_INDIRECTWC,
+ COLLATE_SYMB_HASH_SIZEMB,
+ COLLATE_SYMB_TABLEMB,
+ COLLATE_SYMB_EXTRAMB,
+ COLLATE_COLLSEQMB,
+ COLLATE_COLLSEQWC,
+ COLLATE_CODESET,
+
+ // Not a real element; used to know how many elements there are.
+ COLLATE_LAST
+};
+typedef enum collate_element collate_element_t;
+
+bool parse_locale_data(const void* raw_data, size_t size, locale_data_t* out) {
+ const struct {
+ unsigned int magic;
+ unsigned int nstrings;
+ unsigned int strindex[0];
+ }* const header = raw_data;
+
+ if (header->magic != 0x20051017) {
+ ESP_LOGE(kTag, "file magic incorrect (was %x)", header->magic);
+ return false;
+ }
+
+ if (sizeof(*header) + header->nstrings * sizeof(unsigned int) >= size) {
+ ESP_LOGE(kTag, "file was too small to contain header");
+ return false;
+ }
+
+ if (header->nstrings != COLLATE_LAST) {
+ ESP_LOGE(kTag, "file has incorrect number of elements (was %u, wanted %u)",
+ header->nstrings, COLLATE_LAST);
+ return false;
+ }
+
+ // The LC_COLLATE partition appears to contain data in the correct shape.
+ // Pull out pointers to the various attributes it contains.
+ const void* offsets[COLLATE_LAST];
+ for (size_t i = 0; i < header->nstrings; i++) {
+ size_t offset = header->strindex[i];
+ if (offset > size) {
+ ESP_LOGE(kTag, "element offset (%u) exceeds file size", offset);
+ return false;
+ }
+ offsets[i] = (raw_data + offset);
+ }
+
+ // Now parse those pointers into the output struct.
+ out->nrules = *(const unsigned int*)offsets[COLLATE_NRULES];
+
+ out->rulesets = (unsigned char*)offsets[COLLATE_RULESETS];
+ out->table = (int32_t*)offsets[COLLATE_TABLEMB];
+ out->weights = (unsigned char*)offsets[COLLATE_WEIGHTMB];
+ out->extra = (unsigned char*)offsets[COLLATE_EXTRAMB];
+ out->indirect = (int32_t*)offsets[COLLATE_INDIRECTMB];
+
+ assert(((uintptr_t)out->table) % __alignof__(out->table[0]) == 0);
+ assert(((uintptr_t)out->weights) % __alignof__(out->weights[0]) == 0);
+ assert(((uintptr_t)out->extra) % __alignof__(out->extra[0]) == 0);
+ assert(((uintptr_t)out->indirect) % __alignof__(out->indirect[0]) == 0);
+
+ return true;
+}
+
+/* We need UTF-8 encoding of numbers. */
+static int utf8_encode(char* buf, int val) {
+ int retval;
+
+ if (val < 0x80) {
+ *buf++ = (char)val;
+ retval = 1;
+ } else {
+ int step;
+
+ for (step = 2; step < 6; ++step)
+ if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
+ break;
+ retval = step;
+
+ *buf = (unsigned char)(~0xff >> step);
+ --step;
+ do {
+ buf[step] = 0x80 | (val & 0x3f);
+ val >>= 6;
+ } while (--step > 0);
+ *buf |= val;
+ }
+
+ return retval;
+}
+
+/* Find next weight and rule index. Inlined since called for every char. */
+static __always_inline size_t find_idx(const USTRING_TYPE** us,
+ int32_t* weight_idx,
+ unsigned char* rule_idx,
+ const locale_data_t* l_data,
+ const int pass) {
+ int32_t tmp = findidx(l_data->table, l_data->indirect, l_data->extra, us, -1);
+ *rule_idx = tmp >> 24;
+ int32_t idx = tmp & 0xffffff;
+ size_t len = l_data->weights[idx++];
+
+ /* Skip over indices of previous levels. */
+ for (int i = 0; i < pass; i++) {
+ idx += len;
+ len = l_data->weights[idx++];
+ }
+
+ *weight_idx = idx;
+ return len;
+}
+
+static int find_position(const USTRING_TYPE* us,
+ const locale_data_t* l_data,
+ const int pass) {
+ int32_t weight_idx;
+ unsigned char rule_idx;
+ const USTRING_TYPE* usrc = us;
+
+ find_idx(&usrc, &weight_idx, &rule_idx, l_data, pass);
+ return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position;
+}
+
+/* Do the transformation. */
+static size_t do_xfrm(const USTRING_TYPE* usrc,
+ STRING_TYPE* dest,
+ size_t n,
+ const locale_data_t* l_data) {
+ int32_t weight_idx;
+ unsigned char rule_idx;
+ uint_fast32_t pass;
+ size_t needed = 0;
+ size_t last_needed;
+
+ /* Now the passes over the weights. */
+ for (pass = 0; pass < l_data->nrules; ++pass) {
+ size_t backw_len = 0;
+ last_needed = needed;
+ const USTRING_TYPE* cur = usrc;
+ const USTRING_TYPE* backw_start = NULL;
+
+ /* We assume that if a rule has defined `position' in one section
+ this is true for all of them. */
+ int position = find_position(cur, l_data, pass);
+
+ if (position == 0) {
+ while (*cur != L('\0')) {
+ const USTRING_TYPE* pos = cur;
+ size_t len = find_idx(&cur, &weight_idx, &rule_idx, l_data, pass);
+ int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
+
+ if ((rule & sort_forward) != 0) {
+ /* Handle the pushed backward sequence. */
+ if (backw_start != NULL) {
+ for (size_t i = backw_len; i > 0;) {
+ int32_t weight_idx;
+ unsigned char rule_idx;
+ size_t len =
+ find_idx(&backw_start, &weight_idx, &rule_idx, l_data, pass);
+ if (needed + i < n)
+ for (size_t j = len; j > 0; j--)
+ dest[needed + i - j] = l_data->weights[weight_idx++];
+
+ i -= len;
+ }
+
+ needed += backw_len;
+ backw_start = NULL;
+ backw_len = 0;
+ }
+
+ /* Now handle the forward element. */
+ if (needed + len < n)
+ while (len-- > 0)
+ dest[needed++] = l_data->weights[weight_idx++];
+ else
+ /* No more characters fit into the buffer. */
+ needed += len;
+ } else {
+ /* Remember start of the backward sequence & track length. */
+ if (backw_start == NULL)
+ backw_start = pos;
+ backw_len += len;
+ }
+ }
+
+ /* Handle the pushed backward sequence. */
+ if (backw_start != NULL) {
+ for (size_t i = backw_len; i > 0;) {
+ size_t len =
+ find_idx(&backw_start, &weight_idx, &rule_idx, l_data, pass);
+ if (needed + i < n)
+ for (size_t j = len; j > 0; j--)
+ dest[needed + i - j] = l_data->weights[weight_idx++];
+
+ i -= len;
+ }
+
+ needed += backw_len;
+ }
+ } else {
+ int val = 1;
+ char buf[7];
+ size_t buflen;
+ size_t i;
+
+ while (*cur != L('\0')) {
+ const USTRING_TYPE* pos = cur;
+ size_t len = find_idx(&cur, &weight_idx, &rule_idx, l_data, pass);
+ int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
+
+ if ((rule & sort_forward) != 0) {
+ /* Handle the pushed backward sequence. */
+ if (backw_start != NULL) {
+ for (size_t p = backw_len; p > 0; p--) {
+ size_t len;
+ int32_t weight_idx;
+ unsigned char rule_idx;
+ const USTRING_TYPE* backw_cur = backw_start;
+
+ /* To prevent a warning init the used vars. */
+ len = find_idx(&backw_cur, &weight_idx, &rule_idx, l_data, pass);
+
+ for (i = 1; i < p; i++)
+ len =
+ find_idx(&backw_cur, &weight_idx, &rule_idx, l_data, pass);
+
+ if (len != 0) {
+ buflen = utf8_encode(buf, val);
+ if (needed + buflen + len < n) {
+ for (i = 0; i < buflen; ++i)
+ dest[needed + i] = buf[i];
+ for (i = 0; i < len; ++i)
+ dest[needed + buflen + i] = l_data->weights[weight_idx + i];
+ }
+ needed += buflen + len;
+ val = 1;
+ } else
+ ++val;
+ }
+
+ backw_start = NULL;
+ backw_len = 0;
+ }
+
+ /* Now handle the forward element. */
+ if (len != 0) {
+ buflen = utf8_encode(buf, val);
+ if (needed + buflen + len < n) {
+ for (i = 0; i < buflen; ++i)
+ dest[needed + i] = buf[i];
+ for (i = 0; i < len; ++i)
+ dest[needed + buflen + i] = l_data->weights[weight_idx + i];
+ }
+ needed += buflen + len;
+ val = 1;
+ } else
+ ++val;
+ } else {
+ /* Remember start of the backward sequence & track length. */
+ if (backw_start == NULL)
+ backw_start = pos;
+ backw_len++;
+ }
+ }
+
+ /* Handle the pushed backward sequence. */
+ if (backw_start != NULL) {
+ for (size_t p = backw_len; p > 0; p--) {
+ size_t len;
+ int32_t weight_idx;
+ unsigned char rule_idx;
+ const USTRING_TYPE* backw_cur = backw_start;
+
+ /* To prevent a warning init the used vars. */
+ len = find_idx(&backw_cur, &weight_idx, &rule_idx, l_data, pass);
+
+ for (i = 1; i < p; i++)
+ len = find_idx(&backw_cur, &weight_idx, &rule_idx, l_data, pass);
+
+ if (len != 0) {
+ buflen = utf8_encode(buf, val);
+ if (needed + buflen + len < n) {
+ for (i = 0; i < buflen; ++i)
+ dest[needed + i] = buf[i];
+ for (i = 0; i < len; ++i)
+ dest[needed + buflen + i] = l_data->weights[weight_idx + i];
+ }
+ needed += buflen + len;
+ val = 1;
+ } else
+ ++val;
+ }
+ }
+ }
+
+ /* Finally store the byte to separate the passes or terminate
+ the string. */
+ if (needed < n)
+ dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0');
+ ++needed;
+ }
+
+ /* This is a little optimization: many collation specifications have
+ a `position' rule at the end and if no non-ignored character
+ is found the last \1 byte is immediately followed by a \0 byte
+ signalling this. We can avoid the \1 byte(s). */
+ if (needed > 2 && needed == last_needed + 1) {
+ /* Remove the \1 byte. */
+ if (--needed <= n)
+ dest[needed - 1] = L('\0');
+ }
+
+ /* Return the number of bytes/words we need, but don't count the NUL
+ byte/word at the end. */
+ return needed - 1;
+}
+
+/* Do the transformation using weight-index and rule cache. */
+static size_t do_xfrm_cached(STRING_TYPE* dest,
+ size_t n,
+ const locale_data_t* l_data,
+ size_t idxmax,
+ int32_t* idxarr,
+ const unsigned char* rulearr) {
+ uint_fast32_t nrules = l_data->nrules;
+ unsigned char* rulesets = l_data->rulesets;
+ USTRING_TYPE* weights = l_data->weights;
+ uint_fast32_t pass;
+ size_t needed = 0;
+ size_t last_needed;
+ size_t idxcnt;
+
+ /* Now the passes over the weights. */
+ for (pass = 0; pass < nrules; ++pass) {
+ size_t backw_stop = ~0ul;
+ int rule = rulesets[rulearr[0] * nrules + pass];
+ /* We assume that if a rule has defined `position' in one section
+ this is true for all of them. */
+ int position = rule & sort_position;
+
+ last_needed = needed;
+ if (position == 0) {
+ for (idxcnt = 0; idxcnt < idxmax; ++idxcnt) {
+ if ((rule & sort_forward) != 0) {
+ size_t len;
+
+ if (backw_stop != ~0ul) {
+ /* Handle the pushed elements now. */
+ size_t backw;
+
+ for (backw = idxcnt; backw > backw_stop;) {
+ --backw;
+ len = weights[idxarr[backw]++];
+
+ if (needed + len < n)
+ while (len-- > 0)
+ dest[needed++] = weights[idxarr[backw]++];
+ else {
+ /* No more characters fit into the buffer. */
+ needed += len;
+ idxarr[backw] += len;
+ }
+ }
+
+ backw_stop = ~0ul;
+ }
+
+ /* Now handle the forward element. */
+ len = weights[idxarr[idxcnt]++];
+ if (needed + len < n)
+ while (len-- > 0)
+ dest[needed++] = weights[idxarr[idxcnt]++];
+ else {
+ /* No more characters fit into the buffer. */
+ needed += len;
+ idxarr[idxcnt] += len;
+ }
+ } else {
+ /* Remember where the backwards series started. */
+ if (backw_stop == ~0ul)
+ backw_stop = idxcnt;
+ }
+
+ rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
+ }
+
+ if (backw_stop != ~0ul) {
+ /* Handle the pushed elements now. */
+ size_t backw;
+
+ backw = idxcnt;
+ while (backw > backw_stop) {
+ size_t len = weights[idxarr[--backw]++];
+
+ if (needed + len < n)
+ while (len-- > 0)
+ dest[needed++] = weights[idxarr[backw]++];
+ else {
+ /* No more characters fit into the buffer. */
+ needed += len;
+ idxarr[backw] += len;
+ }
+ }
+ }
+ } else {
+ int val = 1;
+ char buf[7];
+ size_t buflen;
+ size_t i;
+
+ for (idxcnt = 0; idxcnt < idxmax; ++idxcnt) {
+ if ((rule & sort_forward) != 0) {
+ size_t len;
+
+ if (backw_stop != ~0ul) {
+ /* Handle the pushed elements now. */
+ size_t backw;
+
+ for (backw = idxcnt; backw > backw_stop;) {
+ --backw;
+ len = weights[idxarr[backw]++];
+ if (len != 0) {
+ buflen = utf8_encode(buf, val);
+ if (needed + buflen + len < n) {
+ for (i = 0; i < buflen; ++i)
+ dest[needed + i] = buf[i];
+ for (i = 0; i < len; ++i)
+ dest[needed + buflen + i] = weights[idxarr[backw] + i];
+ }
+ needed += buflen + len;
+ idxarr[backw] += len;
+ val = 1;
+ } else
+ ++val;
+ }
+
+ backw_stop = ~0ul;
+ }
+
+ /* Now handle the forward element. */
+ len = weights[idxarr[idxcnt]++];
+ if (len != 0) {
+ buflen = utf8_encode(buf, val);
+ if (needed + buflen + len < n) {
+ for (i = 0; i < buflen; ++i)
+ dest[needed + i] = buf[i];
+ for (i = 0; i < len; ++i)
+ dest[needed + buflen + i] = weights[idxarr[idxcnt] + i];
+ }
+ needed += buflen + len;
+ idxarr[idxcnt] += len;
+ val = 1;
+ } else
+ /* Note that we don't have to increment `idxarr[idxcnt]'
+ since the length is zero. */
+ ++val;
+ } else {
+ /* Remember where the backwards series started. */
+ if (backw_stop == ~0ul)
+ backw_stop = idxcnt;
+ }
+
+ rule = rulesets[rulearr[idxcnt + 1] * nrules + pass];
+ }
+
+ if (backw_stop != ~0ul) {
+ /* Handle the pushed elements now. */
+ size_t backw;
+
+ backw = idxmax - 1;
+ while (backw > backw_stop) {
+ size_t len = weights[idxarr[--backw]++];
+ if (len != 0) {
+ buflen = utf8_encode(buf, val);
+ if (needed + buflen + len < n) {
+ for (i = 0; i < buflen; ++i)
+ dest[needed + i] = buf[i];
+ for (i = 0; i < len; ++i)
+ dest[needed + buflen + i] = weights[idxarr[backw] + i];
+ }
+ needed += buflen + len;
+ idxarr[backw] += len;
+ val = 1;
+ } else
+ ++val;
+ }
+ }
+ }
+
+ /* Finally store the byte to separate the passes or terminate
+ the string. */
+ if (needed < n)
+ dest[needed] = pass + 1 < nrules ? L('\1') : L('\0');
+ ++needed;
+ }
+
+ /* This is a little optimization: many collation specifications have
+ a `position' rule at the end and if no non-ignored character
+ is found the last \1 byte is immediately followed by a \0 byte
+ signalling this. We can avoid the \1 byte(s). */
+ if (needed > 2 && needed == last_needed + 1) {
+ /* Remove the \1 byte. */
+ if (--needed <= n)
+ dest[needed - 1] = L('\0');
+ }
+
+ /* Return the number of bytes/words we need, but don't count the NUL
+ byte/word at the end. */
+ return needed - 1;
+}
+
+size_t glib_strxfrm(char* dest,
+ const char* src,
+ size_t n,
+ locale_data_t* locale) {
+ /* Handle byte comparison case. */
+ if (locale->nrules == 0) {
+ size_t srclen = strlen(src);
+
+ if (n != 0) {
+ strncpy(dest, src, MIN(srclen + 1, n));
+ }
+
+ return srclen;
+ }
+
+ /* Handle an empty string, code hereafter relies on strlen (src) > 0. */
+ if (*src == L('\0')) {
+ if (n != 0)
+ *dest = L('\0');
+ return 0;
+ }
+
+ /* We need the elements of the string as unsigned values since they
+ are used as indeces. */
+ const USTRING_TYPE* usrc = (const USTRING_TYPE*)src;
+
+ /* Allocate cache for small strings on the stack and fill it with weight and
+ rule indices. If the cache size is not sufficient, continue with the
+ uncached xfrm version. */
+ size_t idxmax = 0;
+ const USTRING_TYPE* cur = usrc;
+ int32_t* idxarr = alloca(SMALL_STR_SIZE * sizeof(int32_t));
+ unsigned char* rulearr = alloca(SMALL_STR_SIZE + 1);
+
+ do {
+ int32_t tmp =
+ findidx(locale->table, locale->indirect, locale->extra, &cur, -1);
+ rulearr[idxmax] = tmp >> 24;
+ idxarr[idxmax] = tmp & 0xffffff;
+
+ ++idxmax;
+ } while (*cur != L('\0') && idxmax < SMALL_STR_SIZE);
+
+ /* This element is only read, the value never used but to determine
+ another value which then is ignored. */
+ rulearr[idxmax] = '\0';
+
+ /* Do the transformation. */
+ if (*cur == L('\0'))
+ return do_xfrm_cached(dest, n, locale, idxmax, idxarr, rulearr);
+ else
+ return do_xfrm(usrc, dest, n, locale);
+}
diff --git a/src/system_fsm/CMakeLists.txt b/src/system_fsm/CMakeLists.txt
index 449e14cc..8535a0e7 100644
--- a/src/system_fsm/CMakeLists.txt
+++ b/src/system_fsm/CMakeLists.txt
@@ -5,5 +5,5 @@
idf_component_register(
SRCS "system_fsm.cpp" "running.cpp" "booting.cpp" "idle.cpp" "service_locator.cpp"
INCLUDE_DIRS "include"
- REQUIRES "tinyfsm" "drivers" "database" "ui" "result" "events" "audio" "app_console" "battery")
+ REQUIRES "tinyfsm" "drivers" "database" "ui" "result" "events" "audio" "app_console" "battery" "locale")
target_compile_options(${COMPONENT_LIB} PRIVATE ${EXTRA_WARNINGS})
diff --git a/src/system_fsm/booting.cpp b/src/system_fsm/booting.cpp
index 82d83836..893a4560 100644
--- a/src/system_fsm/booting.cpp
+++ b/src/system_fsm/booting.cpp
@@ -4,6 +4,7 @@
* SPDX-License-Identifier: GPL-3.0-only
*/
+#include "collation.hpp"
#include "system_fsm.hpp"
#include <stdint.h>
@@ -76,6 +77,7 @@ auto Booting::entry() -> void {
sServices->track_queue(std::make_unique<audio::TrackQueue>());
sServices->tag_parser(std::make_unique<database::TagParserImpl>());
+ sServices->collator(locale::CreateCollator());
ESP_LOGI(kTag, "init bluetooth");
sServices->bluetooth(std::make_unique<drivers::Bluetooth>(sServices->nvs()));
diff --git a/src/system_fsm/include/service_locator.hpp b/src/system_fsm/include/service_locator.hpp
index 1dcf0f5e..24dc1eb9 100644
--- a/src/system_fsm/include/service_locator.hpp
+++ b/src/system_fsm/include/service_locator.hpp
@@ -10,6 +10,7 @@
#include "battery.hpp"
#include "bluetooth.hpp"
+#include "collation.hpp"
#include "database.hpp"
#include "gpios.hpp"
#include "nvs.hpp"
@@ -101,6 +102,15 @@ class ServiceLocator {
queue_ = std::move(i);
}
+ auto collator() -> locale::ICollator& {
+ assert(collator_ != nullptr);
+ return *collator_;
+ }
+
+ auto collator(std::unique_ptr<locale::ICollator> i) {
+ collator_ = std::move(i);
+ }
+
// Not copyable or movable.
ServiceLocator(const ServiceLocator&) = delete;
ServiceLocator& operator=(const ServiceLocator&) = delete;
@@ -117,6 +127,7 @@ class ServiceLocator {
std::shared_ptr<database::Database> database_;
std::unique_ptr<database::ITagParser> tag_parser_;
+ std::unique_ptr<locale::ICollator> collator_;
drivers::SdState sd_;
};
diff --git a/src/system_fsm/running.cpp b/src/system_fsm/running.cpp
index 567553a9..91cd46af 100644
--- a/src/system_fsm/running.cpp
+++ b/src/system_fsm/running.cpp
@@ -55,8 +55,8 @@ void Running::entry() {
ESP_LOGI(kTag, "opening database");
sFileGatherer = new database::FileGathererImpl();
- auto database_res =
- database::Database::Open(*sFileGatherer, sServices->tag_parser());
+ auto database_res = database::Database::Open(
+ *sFileGatherer, sServices->tag_parser(), sServices->collator());
if (database_res.has_error()) {
ESP_LOGW(kTag, "failed to open!");
events::System().Dispatch(StorageError{});