From 1858cc04cc8d435cffa80dd838d70b5c167dbe47 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Oct 2025 09:03:44 +0000 Subject: [PATCH 1/7] Initial plan From eb5d7e6953142551028caa69222f2310542b4f09 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Oct 2025 09:24:14 +0000 Subject: [PATCH 2/7] Initial plan for copilot-instructions.md Co-authored-by: ahuber21 <9201869+ahuber21@users.noreply.github.com> --- benchmark/include/svs-benchmark/build.h | 3 +- .../svs-benchmark/inverted/memory/build.h | 3 +- .../svs-benchmark/inverted/memory/search.h | 6 ++- .../svs-benchmark/inverted/memory/test.h | 3 +- benchmark/include/svs-benchmark/ivf/search.h | 3 +- benchmark/include/svs-benchmark/ivf/test.h | 3 +- benchmark/include/svs-benchmark/test.h | 9 +++-- .../include/svs-benchmark/vamana/build.h | 3 +- .../include/svs-benchmark/vamana/iterator.h | 37 ++++++++++++------- .../include/svs-benchmark/vamana/search.h | 3 +- benchmark/include/svs-benchmark/vamana/test.h | 3 +- .../src/inverted/memory/uncompressed.cpp | 6 ++- benchmark/src/ivf/uncompressed.cpp | 18 ++++++--- benchmark/src/vamana/uncompressed.cpp | 21 +++++++---- bindings/python/include/svs/python/common.h | 3 +- bindings/python/include/svs/python/core.h | 3 +- bindings/python/src/ivf.cpp | 6 ++- bindings/python/src/vamana.cpp | 3 +- bindings/python/src/vamana_common.cpp | 9 +++-- include/svs/concepts/data.h | 10 ++--- include/svs/core/allocator.h | 3 +- include/svs/core/data/simple.h | 11 +++--- include/svs/core/distance/cosine.h | 12 ++++-- include/svs/core/distance/distance_core.h | 3 +- include/svs/core/kmeans.h | 3 +- include/svs/core/logging.h | 3 +- include/svs/index/flat/dynamic_flat.h | 7 ++-- include/svs/index/flat/flat.h | 10 +++-- include/svs/index/inverted/memory_based.h | 12 ++++-- .../svs/index/inverted/memory_search_params.h | 3 +- include/svs/index/ivf/clustering.h | 6 ++- include/svs/index/ivf/common.h | 6 ++- include/svs/index/ivf/hierarchical_kmeans.h | 10 +++-- include/svs/index/ivf/kmeans.h | 3 +- include/svs/index/vamana/dynamic_index.h | 21 +++++++---- include/svs/index/vamana/greedy_search.h | 3 +- include/svs/index/vamana/index.h | 33 +++++++++++------ include/svs/index/vamana/iterator.h | 13 +++---- include/svs/index/vamana/iterator_schedule.h | 8 ++-- include/svs/index/vamana/multi.h | 18 ++++++--- include/svs/index/vamana/search_params.h | 3 +- include/svs/index/vamana/vamana_build.h | 6 ++- include/svs/lib/algorithms.h | 3 +- include/svs/lib/array.h | 4 +- include/svs/lib/dispatcher.h | 3 +- include/svs/lib/invoke.h | 12 +++--- include/svs/lib/prefetch.h | 3 +- include/svs/lib/saveload.h | 12 ++---- include/svs/lib/saveload/load.h | 7 ++-- include/svs/lib/saveload/save.h | 6 +-- include/svs/lib/threads/types.h | 19 +++++----- include/svs/orchestrators/dynamic_flat.h | 3 +- include/svs/orchestrators/dynamic_vamana.h | 7 ++-- include/svs/orchestrators/inverted.h | 6 ++- include/svs/orchestrators/vamana.h | 7 ++-- include/svs/quantization/scalar/scalar.h | 6 ++- tests/integration/vamana/scalar_iterator.cpp | 3 +- tests/svs/core/allocator.cpp | 7 ++-- tests/svs/core/data/block.cpp | 3 +- tests/svs/core/logging.cpp | 3 +- tests/svs/index/flat/dynamic_flat.cpp | 9 +++-- tests/svs/index/inverted/clustering.cpp | 3 +- tests/svs/index/vamana/dynamic_index.cpp | 6 ++- tests/svs/index/vamana/dynamic_index_2.cpp | 6 +-- tests/svs/index/vamana/index.cpp | 3 +- tests/svs/index/vamana/iterator.cpp | 3 +- tests/svs/index/vamana/iterator_schedule.cpp | 3 +- tests/svs/index/vamana/multi.cpp | 3 +- tests/svs/index/vamana/search_buffer.cpp | 6 ++- tests/svs/lib/array.cpp | 3 +- tests/svs/lib/dispatcher.cpp | 6 ++- tests/svs/lib/meta.cpp | 3 +- tests/svs/lib/saveload.cpp | 3 +- tests/svs/lib/threads/thread.cpp | 3 +- tests/svs/lib/threads/threadpool.cpp | 5 ++- tests/utils/schemas.cpp | 6 ++- tests/utils/test_dataset.cpp | 3 +- utils/assemble_vamana.cpp | 3 +- utils/benchmarks/index_build.cpp | 3 +- utils/build_index.cpp | 6 ++- utils/characterization/mutable.cpp | 6 +-- utils/convert_data_to_bfloat16.cpp | 3 +- utils/convert_data_to_float16.cpp | 3 +- utils/search_index.cpp | 3 +- utils/search_index_numa.cpp | 9 +++-- utils/search_ivf.cpp | 6 ++- 86 files changed, 360 insertions(+), 222 deletions(-) diff --git a/benchmark/include/svs-benchmark/build.h b/benchmark/include/svs-benchmark/build.h index 4e08695d..255297f5 100644 --- a/benchmark/include/svs-benchmark/build.h +++ b/benchmark/include/svs-benchmark/build.h @@ -325,7 +325,8 @@ Bundle, T, Q, Distance> initialize_dynamic( .index = init(vectors, indices), .reference = std::move(reference), .queries = std::move(queries), - .build_time = 0}; + .build_time = 0 + }; bundle.build_time = svs::lib::time_difference(tic); return bundle; } diff --git a/benchmark/include/svs-benchmark/inverted/memory/build.h b/benchmark/include/svs-benchmark/inverted/memory/build.h index 7ee47d22..fdd4d445 100644 --- a/benchmark/include/svs-benchmark/inverted/memory/build.h +++ b/benchmark/include/svs-benchmark/inverted/memory/build.h @@ -180,7 +180,8 @@ struct MemoryBuildJob { svs::DistanceType get_distance() const { return distance_; } svs::index::inverted::InvertedBuildParameters get_build_parameters() const { return svs::index::inverted::InvertedBuildParameters{ - clustering_parameters_, primary_build_parameters_}; + clustering_parameters_, primary_build_parameters_ + }; } std::vector get_search_configs() const { diff --git a/benchmark/include/svs-benchmark/inverted/memory/search.h b/benchmark/include/svs-benchmark/inverted/memory/search.h index 4a950933..1d2e9311 100644 --- a/benchmark/include/svs-benchmark/inverted/memory/search.h +++ b/benchmark/include/svs-benchmark/inverted/memory/search.h @@ -118,7 +118,8 @@ struct PiecewiseAssembly { SVS_LOAD_MEMBER_AT_(table, strategy), extract_filename(table, "clustering", root), extract_filename(table, "primary_index_config", root), - extract_filename(table, "primary_index_graph", root)}; + extract_filename(table, "primary_index_graph", root) + }; } }; @@ -214,7 +215,8 @@ struct MemorySearchJob { SVS_LOAD_MEMBER_AT_(table, search_targets), extract_filename(table, "original_data", data_root), extract_filename(table, "queries", data_root), - extract_filename(table, "groundtruth", data_root)}; + extract_filename(table, "groundtruth", data_root) + }; } }; diff --git a/benchmark/include/svs-benchmark/inverted/memory/test.h b/benchmark/include/svs-benchmark/inverted/memory/test.h index 0db7c0bf..420bafe9 100644 --- a/benchmark/include/svs-benchmark/inverted/memory/test.h +++ b/benchmark/include/svs-benchmark/inverted/memory/test.h @@ -108,7 +108,8 @@ struct InvertedTest { svsbenchmark::extract_filename(table, "data_f32", root), svsbenchmark::extract_filename(table, "queries_f32", root), SVS_LOAD_MEMBER_AT_(table, queries_in_training_set), - num_threads}; + num_threads + }; } }; diff --git a/benchmark/include/svs-benchmark/ivf/search.h b/benchmark/include/svs-benchmark/ivf/search.h index 7a0c563a..e3072acc 100644 --- a/benchmark/include/svs-benchmark/ivf/search.h +++ b/benchmark/include/svs-benchmark/ivf/search.h @@ -199,7 +199,8 @@ struct SearchJob { SVS_LOAD_MEMBER_AT_(table, ndims), SVS_LOAD_MEMBER_AT_(table, num_threads), SVS_LOAD_MEMBER_AT_(table, search_parameters), - SVS_LOAD_MEMBER_AT_(table, preset_parameters)}; + SVS_LOAD_MEMBER_AT_(table, preset_parameters) + }; } }; diff --git a/benchmark/include/svs-benchmark/ivf/test.h b/benchmark/include/svs-benchmark/ivf/test.h index 943272f7..309189a1 100644 --- a/benchmark/include/svs-benchmark/ivf/test.h +++ b/benchmark/include/svs-benchmark/ivf/test.h @@ -119,7 +119,8 @@ struct IVFTest { svsbenchmark::extract_filename(table, "graph", root), svsbenchmark::extract_filename(table, "queries_f32", root), SVS_LOAD_MEMBER_AT_(table, queries_in_training_set), - num_threads}; + num_threads + }; } }; diff --git a/benchmark/include/svs-benchmark/test.h b/benchmark/include/svs-benchmark/test.h index a8190b99..9413c0ea 100644 --- a/benchmark/include/svs-benchmark/test.h +++ b/benchmark/include/svs-benchmark/test.h @@ -69,7 +69,8 @@ struct DistanceAndGroundtruth { ) { return DistanceAndGroundtruth{ SVS_LOAD_MEMBER_AT_(table, distance), - svsbenchmark::extract_filename(table, "path", root)}; + svsbenchmark::extract_filename(table, "path", root) + }; } }; @@ -133,7 +134,8 @@ template struct ConfigAndResultPrototype { SVS_LOAD_MEMBER_AT_(table, num_neighbors), SVS_LOAD_MEMBER_AT_(table, recall_k), SVS_LOAD_MEMBER_AT_(table, num_queries), - SVS_LOAD_MEMBER_AT_(table, recall)}; + SVS_LOAD_MEMBER_AT_(table, recall) + }; } }; @@ -214,7 +216,8 @@ struct ExpectedResultPrototype { SVS_LOAD_MEMBER_AT_(table, dataset, root), SVS_LOAD_MEMBER_AT_(table, distance), std::move(build_parameters), - SVS_LOAD_MEMBER_AT_(table, config_and_recall)}; + SVS_LOAD_MEMBER_AT_(table, config_and_recall) + }; } }; diff --git a/benchmark/include/svs-benchmark/vamana/build.h b/benchmark/include/svs-benchmark/vamana/build.h index 5278f96d..86e2202f 100644 --- a/benchmark/include/svs-benchmark/vamana/build.h +++ b/benchmark/include/svs-benchmark/vamana/build.h @@ -380,7 +380,8 @@ struct BuildJob : public BuildJobBase { load_preset(), SVS_LOAD_MEMBER_AT_(table, search_parameters), load_save_directory(), - BuildJobBase::from_toml(table, root)}; + BuildJobBase::from_toml(table, root) + }; } }; diff --git a/benchmark/include/svs-benchmark/vamana/iterator.h b/benchmark/include/svs-benchmark/vamana/iterator.h index 7cb73cab..b4b887d3 100644 --- a/benchmark/include/svs-benchmark/vamana/iterator.h +++ b/benchmark/include/svs-benchmark/vamana/iterator.h @@ -63,7 +63,8 @@ struct IteratorSearchParameters { {SVS_LIST_SAVE_(batch_sizes), SVS_LIST_SAVE_(target_recalls), SVS_LIST_SAVE_(num_batches), - SVS_LIST_SAVE_(query_subsample)}}; + SVS_LIST_SAVE_(query_subsample)} + }; } static IteratorSearchParameters load(const svs::lib::ContextFreeLoadTable& table) { @@ -71,7 +72,8 @@ struct IteratorSearchParameters { SVS_LOAD_MEMBER_AT_(table, batch_sizes), SVS_LOAD_MEMBER_AT_(table, target_recalls), SVS_LOAD_MEMBER_AT_(table, num_batches), - SVS_LOAD_MEMBER_AT_(table, query_subsample)}; + SVS_LOAD_MEMBER_AT_(table, query_subsample) + }; } }; @@ -101,8 +103,10 @@ svsbenchmark::search::QuerySet subsample( return svsbenchmark::search::QuerySet{ svs::data::ConstSimpleDataView{queries.data(), 2 * count, queries.dimensions()}, svs::data::ConstSimpleDataView{ - groundtruth.data(), 2 * count, groundtruth.dimensions()}, - count}; + groundtruth.data(), 2 * count, groundtruth.dimensions() + }, + count + }; } struct IteratorSearch { @@ -131,7 +135,8 @@ struct IteratorSearch { .distance_ = svs::DistanceType::L2, .parameters_ = IteratorSearchParameters::example(), .query_type_ = svs::DataType::float32, - .ndims_ = Extent{svs::Dynamic}}; + .ndims_ = Extent{svs::Dynamic} + }; } // Dispatch invocation. @@ -173,7 +178,8 @@ struct IteratorSearch { .distance_ = SVS_LOAD_MEMBER_AT_(table, distance), .parameters_ = SVS_LOAD_MEMBER_AT_(table, parameters), .query_type_ = SVS_LOAD_MEMBER_AT_(table, query_type), - .ndims_ = SVS_LOAD_MEMBER_AT_(table, ndims)}; + .ndims_ = SVS_LOAD_MEMBER_AT_(table, ndims) + }; } }; @@ -214,7 +220,8 @@ struct YieldedResult { SVS_LIST_SAVE_(yielded), SVS_LIST_SAVE_(total_yielded), SVS_LIST_SAVE_(total_recall), - SVS_LIST_SAVE_(execution_time)}}; + SVS_LIST_SAVE_(execution_time)} + }; } }; @@ -266,7 +273,8 @@ template struct QueryIteratorResult { SVS_LIST_SAVE_(num_batches), SVS_LIST_SAVE_(target_recall), SVS_LIST_SAVE_(report), - SVS_LIST_SAVE_(results)}}; + SVS_LIST_SAVE_(results)} + }; } }; @@ -372,7 +380,8 @@ std::vector> tune_and_search_iterator( .yielded_ = iterator.size(), .total_yielded_ = total_yielded, .total_recall_ = recall, - .execution_time_ = execution_time}; + .execution_time_ = execution_time + }; }; // Now that we have the baseline, obtain iterator based results. @@ -435,10 +444,12 @@ toml::table tune_and_search_iterator( // Use a helper lambda to save the results. // This lambda can be reused when generating the final ``toml::table`` to ensure the // layout is the same. - auto serialize_results = [&](const std::vector>& - results_so_far) { - return toml::table{{"job", toml_base}, {"results", svs::lib::save(results_so_far)}}; - }; + auto serialize_results = + [&](const std::vector>& results_so_far) { + return toml::table{ + {"job", toml_base}, {"results", svs::lib::save(results_so_far)} + }; + }; auto do_checkpoint = [&](const std::vector>& results_so_far ) { diff --git a/benchmark/include/svs-benchmark/vamana/search.h b/benchmark/include/svs-benchmark/vamana/search.h index 31bba634..b8efc9c3 100644 --- a/benchmark/include/svs-benchmark/vamana/search.h +++ b/benchmark/include/svs-benchmark/vamana/search.h @@ -210,7 +210,8 @@ struct SearchJob { SVS_LOAD_MEMBER_AT_(table, ndims), SVS_LOAD_MEMBER_AT_(table, num_threads), SVS_LOAD_MEMBER_AT_(table, search_parameters), - SVS_LOAD_MEMBER_AT_(table, preset_parameters)}; + SVS_LOAD_MEMBER_AT_(table, preset_parameters) + }; } }; diff --git a/benchmark/include/svs-benchmark/vamana/test.h b/benchmark/include/svs-benchmark/vamana/test.h index 18dc0614..c9520a44 100644 --- a/benchmark/include/svs-benchmark/vamana/test.h +++ b/benchmark/include/svs-benchmark/vamana/test.h @@ -118,7 +118,8 @@ struct VamanaTest { svsbenchmark::extract_filename(table, "graph", root), svsbenchmark::extract_filename(table, "queries_f32", root), SVS_LOAD_MEMBER_AT_(table, queries_in_training_set), - num_threads}; + num_threads + }; } }; diff --git a/benchmark/src/inverted/memory/uncompressed.cpp b/benchmark/src/inverted/memory/uncompressed.cpp index 85fe09c8..05c3cca5 100644 --- a/benchmark/src/inverted/memory/uncompressed.cpp +++ b/benchmark/src/inverted/memory/uncompressed.cpp @@ -220,7 +220,8 @@ svsbenchmark::TestFunctionReturn test_build(const InvertedTest& job) { index, build_job, search::QuerySet{ - std::move(queries), std::move(groundtruth), job.queries_in_training_set_}, + std::move(queries), std::move(groundtruth), job.queries_in_training_set_ + }, svsbenchmark::BuildTime{build_time}, svsbenchmark::Placeholder{} ); @@ -228,7 +229,8 @@ svsbenchmark::TestFunctionReturn test_build(const InvertedTest& job) { return svsbenchmark::TestFunctionReturn{ .key_ = "inverted_test_build", .results_ = - svs::lib::save_to_table(memory::ExpectedResult(std::move(kind), results))}; + svs::lib::save_to_table(memory::ExpectedResult(std::move(kind), results)) + }; } } // namespace diff --git a/benchmark/src/ivf/uncompressed.cpp b/benchmark/src/ivf/uncompressed.cpp index 2ddc7cfd..8b31dcad 100644 --- a/benchmark/src/ivf/uncompressed.cpp +++ b/benchmark/src/ivf/uncompressed.cpp @@ -144,7 +144,8 @@ svsbenchmark::TestFunctionReturn test_search(const IVFTest& job) { Extent(svs::Dynamic), job.num_threads_, test_search_parameters(), - test_search_configs()}; + test_search_configs() + }; // Load the components for the test. auto tic = svs::lib::now(); @@ -164,14 +165,16 @@ svsbenchmark::TestFunctionReturn test_search(const IVFTest& job) { index, search_job, svsbenchmark::search::QuerySet{ - std::move(queries), std::move(groundtruth), job.queries_in_training_set_}, + std::move(queries), std::move(groundtruth), job.queries_in_training_set_ + }, svsbenchmark::LoadTime{load_time}, svsbenchmark::Placeholder{} ); return TestFunctionReturn{ .key_ = "ivf_test_search", - .results_ = svs::lib::save_to_table(ivf::ExpectedResult(std::move(kind), results))}; + .results_ = svs::lib::save_to_table(ivf::ExpectedResult(std::move(kind), results)) + }; } template @@ -201,7 +204,8 @@ svsbenchmark::TestFunctionReturn test_build(const IVFTest& job) { svs::distance_type_v, Extent(svs::Dynamic), build_parameters, - job.num_threads_}; + job.num_threads_ + }; // Load the components for the test. auto data = svsbenchmark::convert_data( @@ -223,14 +227,16 @@ svsbenchmark::TestFunctionReturn test_build(const IVFTest& job) { index, build_job, svsbenchmark::search::QuerySet{ - std::move(queries), std::move(groundtruth), job.queries_in_training_set_}, + std::move(queries), std::move(groundtruth), job.queries_in_training_set_ + }, svsbenchmark::BuildTime{build_time}, svsbenchmark::Placeholder{} ); return TestFunctionReturn{ .key_ = "ivf_test_build", - .results_ = svs::lib::save_to_table(ivf::ExpectedResult(std::move(kind), results))}; + .results_ = svs::lib::save_to_table(ivf::ExpectedResult(std::move(kind), results)) + }; } } // namespace diff --git a/benchmark/src/vamana/uncompressed.cpp b/benchmark/src/vamana/uncompressed.cpp index db88efb5..b40e5f92 100644 --- a/benchmark/src/vamana/uncompressed.cpp +++ b/benchmark/src/vamana/uncompressed.cpp @@ -244,7 +244,8 @@ svsbenchmark::TestFunctionReturn test_search(const VamanaTest& job) { Extent(svs::Dynamic), job.num_threads_, test_search_parameters(), - test_search_configs()}; + test_search_configs() + }; // Load the components for the test. auto tic = svs::lib::now(); @@ -268,7 +269,8 @@ svsbenchmark::TestFunctionReturn test_search(const VamanaTest& job) { index, search_job, svsbenchmark::search::QuerySet{ - std::move(queries), std::move(groundtruth), job.queries_in_training_set_}, + std::move(queries), std::move(groundtruth), job.queries_in_training_set_ + }, svsbenchmark::LoadTime{load_time}, IndexTraits::test_generation_optimization() ); @@ -276,7 +278,8 @@ svsbenchmark::TestFunctionReturn test_search(const VamanaTest& job) { return TestFunctionReturn{ .key_ = "vamana_test_search", .results_ = - svs::lib::save_to_table(vamana::ExpectedResult(std::move(kind), results))}; + svs::lib::save_to_table(vamana::ExpectedResult(std::move(kind), results)) + }; } template @@ -286,7 +289,8 @@ svsbenchmark::TestFunctionReturn test_build(const VamanaTest& job) { const auto& groundtruth_path = job.groundtruth_for(distance); auto build_parameters = svs::index::vamana::VamanaBuildParameters{ - pick_alpha(distance), 32, 100, 250, 28, true}; + pick_alpha(distance), 32, 100, 250, 28, true + }; auto kind = svsbenchmark::Uncompressed(svs::datatype_v); @@ -306,7 +310,8 @@ svsbenchmark::TestFunctionReturn test_build(const VamanaTest& job) { svs::distance_type_v, Extent(svs::Dynamic), build_parameters, - job.num_threads_}; + job.num_threads_ + }; // Load the components for the test. auto tic = svs::lib::now(); @@ -326,7 +331,8 @@ svsbenchmark::TestFunctionReturn test_build(const VamanaTest& job) { index, build_job, svsbenchmark::search::QuerySet{ - std::move(queries), std::move(groundtruth), job.queries_in_training_set_}, + std::move(queries), std::move(groundtruth), job.queries_in_training_set_ + }, svsbenchmark::BuildTime{build_time}, IndexTraits::test_generation_optimization() ); @@ -334,7 +340,8 @@ svsbenchmark::TestFunctionReturn test_build(const VamanaTest& job) { return TestFunctionReturn{ .key_ = "vamana_test_build", .results_ = - svs::lib::save_to_table(vamana::ExpectedResult(std::move(kind), results))}; + svs::lib::save_to_table(vamana::ExpectedResult(std::move(kind), results)) + }; } } // namespace diff --git a/bindings/python/include/svs/python/common.h b/bindings/python/include/svs/python/common.h index 86817f0d..c4aacd47 100644 --- a/bindings/python/include/svs/python/common.h +++ b/bindings/python/include/svs/python/common.h @@ -186,7 +186,8 @@ matrix_view(pybind11::array_t& data) { template pybind11::array_t numpy_vector(size_t s) { return pybind11::array_t{ - {svs::lib::narrow(s)}}; + {svs::lib::narrow(s)} + }; } /// diff --git a/bindings/python/include/svs/python/core.h b/bindings/python/include/svs/python/core.h index 50864281..1127778d 100644 --- a/bindings/python/include/svs/python/core.h +++ b/bindings/python/include/svs/python/core.h @@ -57,7 +57,8 @@ struct AnonymousVectorData { : array_{ array.template unchecked<2>().data(0, 0), svs::lib::narrow(array.shape(0)), - svs::lib::narrow(array.shape(1))} {} + svs::lib::narrow(array.shape(1)) + } {} // Interface. svs::DataType type() const { return array_.type(); } diff --git a/bindings/python/src/ivf.cpp b/bindings/python/src/ivf.cpp index 06a651fe..507b989f 100644 --- a/bindings/python/src/ivf.cpp +++ b/bindings/python/src/ivf.cpp @@ -516,7 +516,8 @@ void wrap(py::module& m) { is_hierarchical, training_fraction, hierarchical_level1_clusters, - seed}; + seed + }; }), py::arg("num_centroids") = 1000, py::arg("minibatch_size") = 10'000, @@ -559,7 +560,8 @@ void wrap(py::module& m) { Args: n_probes: The number of nearest clusters to be explored k_reorder: Level of reordering or reranking done when using compressed datasets - )"}; + )" + }; params .def(py::init(), py::arg("n_probes") = 1, py::arg("k_reorder") = 1.0) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index 6b4c1c75..c31df3fd 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -428,7 +428,8 @@ void wrap(py::module& m) { window_size, max_candidate_pool_size, prune_to, - use_full_search_history}; + use_full_search_history + }; }), py::arg("alpha") = svs::FLOAT_PLACEHOLDER, py::arg("graph_max_degree") = svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT, diff --git a/bindings/python/src/vamana_common.cpp b/bindings/python/src/vamana_common.cpp index f23688ee..ef4812e8 100644 --- a/bindings/python/src/vamana_common.cpp +++ b/bindings/python/src/vamana_common.cpp @@ -86,7 +86,8 @@ See also: :py:class:`svs.VamanaSearchParameters`, that will be used to determine stopping conditions for graph search. search_buffer_capacity (int, read-only): The (expected) number of valid entries that will be available. Must be at least as large as `search_window_size`. -)"}; +)" + }; config.def(py::init<>()) .def( @@ -138,7 +139,8 @@ See also: :py:class:`Vamana.search_parameters`. Setting either ``prefetch_lookahead`` or ``prefetch_step`` to zero disables candidate prefetching during search. - )"}; + )" + }; // N.B.: Keep defaults the same as the C++ class params @@ -220,7 +222,8 @@ See also: :py:meth:`Vamana.experimental_calibrate` and train_prefetchers (bool): Flag to train prefetch parameters. use_existing_parameter_values (bool): Should optimization use existing search parameters or should it use defaults instead. -)"}; +)" + }; // N.B.: Keep defaults the same as the C++ class params.def(py::init<>(), "Instantiate with default parameters.") diff --git a/include/svs/concepts/data.h b/include/svs/concepts/data.h index eb989a50..4ae5b805 100644 --- a/include/svs/concepts/data.h +++ b/include/svs/concepts/data.h @@ -186,8 +186,8 @@ void copy(const Input& input, Output& output) { struct GetDatumAccessor { template - SVS_FORCE_INLINE auto operator()(const Data& data, I i) const - -> decltype(data.get_datum(i)) { + SVS_FORCE_INLINE auto + operator()(const Data& data, I i) const -> decltype(data.get_datum(i)) { return data.get_datum(i); } @@ -199,9 +199,9 @@ struct GetDatumAccessor { template concept AccessorFor = requires(Accessor& accessor, const Data& data, size_t i) { - accessor(data, i); - accessor.prefetch(data, i); - }; + accessor(data, i); + accessor.prefetch(data, i); +}; } // namespace data } // namespace svs diff --git a/include/svs/core/allocator.h b/include/svs/core/allocator.h index 1e449a7e..f1f9c460 100644 --- a/include/svs/core/allocator.h +++ b/include/svs/core/allocator.h @@ -98,7 +98,8 @@ static constexpr std::array hugepage_x86_options{ }; #else static constexpr std::array hugepage_x86_options{ - HugepageX86Parameters{1 << 12, 0}}; + HugepageX86Parameters{1 << 12, 0} +}; #endif // __linux__ namespace detail { diff --git a/include/svs/core/data/simple.h b/include/svs/core/data/simple.h index df0a45c3..dbd1a67f 100644 --- a/include/svs/core/data/simple.h +++ b/include/svs/core/data/simple.h @@ -167,7 +167,8 @@ struct Matcher { ) { auto matcher = Matcher{ .eltype = SVS_LOAD_MEMBER_AT(table, eltype), - .dims = SVS_LOAD_MEMBER_AT(table, dims)}; + .dims = SVS_LOAD_MEMBER_AT(table, dims) + }; // Perform a sanity check on the arguments. if (type_hint != DataType::undef && type_hint != matcher.eltype) { @@ -481,9 +482,8 @@ class SimpleData { template requires(!is_const) - void compact( - std::span new_to_old, Pool& threadpool, size_t batchsize = 1'000'000 - ) { + void + compact(std::span new_to_old, Pool& threadpool, size_t batchsize = 1'000'000) { // Allocate scratch space. batchsize = std::min(batchsize, size()); auto buffer = data::SimpleData(batchsize, dimensions()); @@ -503,7 +503,8 @@ class SimpleData { if (forced || new_size > capacity()) { auto new_data = array_type{ svs::make_dims(new_size, lib::forward_extent(dimensions())), - get_allocator()}; + get_allocator() + }; // Copy our contents into the new array. // Since the backing array is dense, we can use `memcpy`. diff --git a/include/svs/core/distance/cosine.h b/include/svs/core/distance/cosine.h index 9f492499..6f2d67f6 100644 --- a/include/svs/core/distance/cosine.h +++ b/include/svs/core/distance/cosine.h @@ -237,13 +237,15 @@ template <> struct CosineFloatOp<16> : public svs::simd::ConvertToFloat<16> { static Pair accumulate(Pair accumulator, __m512 a, __m512 b) { return { - _mm512_fmadd_ps(a, b, accumulator.op), _mm512_fmadd_ps(b, b, accumulator.norm)}; + _mm512_fmadd_ps(a, b, accumulator.op), _mm512_fmadd_ps(b, b, accumulator.norm) + }; } static Pair accumulate(mask_t m, Pair accumulator, __m512 a, __m512 b) { return { _mm512_mask3_fmadd_ps(a, b, accumulator.op, m), - _mm512_mask3_fmadd_ps(b, b, accumulator.norm, m)}; + _mm512_mask3_fmadd_ps(b, b, accumulator.norm, m) + }; } static Pair combine(Pair x, Pair y) { @@ -408,13 +410,15 @@ template <> struct CosineFloatOp<8> : public svs::simd::ConvertToFloat<8> { static Pair accumulate(Pair accumulator, __m256 a, __m256 b) { return { - _mm256_fmadd_ps(a, b, accumulator.op), _mm256_fmadd_ps(b, b, accumulator.norm)}; + _mm256_fmadd_ps(a, b, accumulator.op), _mm256_fmadd_ps(b, b, accumulator.norm) + }; } static Pair accumulate(mask_t /*m*/, Pair accumulator, __m256 a, __m256 b) { // For AVX2, masking is handled in the load operations return { - _mm256_fmadd_ps(a, b, accumulator.op), _mm256_fmadd_ps(b, b, accumulator.norm)}; + _mm256_fmadd_ps(a, b, accumulator.op), _mm256_fmadd_ps(b, b, accumulator.norm) + }; } static Pair combine(Pair x, Pair y) { diff --git a/include/svs/core/distance/distance_core.h b/include/svs/core/distance/distance_core.h index 4f59f9de..acf4a679 100644 --- a/include/svs/core/distance/distance_core.h +++ b/include/svs/core/distance/distance_core.h @@ -29,7 +29,8 @@ namespace svs::distance { enum class AVX_AVAILABILITY { NONE, AVX2, AVX512 }; constexpr std::array supported_dim_list{ - 64, 96, 100, 128, 160, 200, 512, 768, svs::Dynamic}; + 64, 96, 100, 128, 160, 200, 512, 768, svs::Dynamic +}; template constexpr bool is_dim_supported() { for (auto i : supported_dim_list) { diff --git a/include/svs/core/kmeans.h b/include/svs/core/kmeans.h index f4f80e7f..0efef457 100644 --- a/include/svs/core/kmeans.h +++ b/include/svs/core/kmeans.h @@ -191,7 +191,8 @@ data::SimpleData train_impl( for (size_t batch = 0; batch < num_batches; ++batch) { auto batch_timer = timer.push_back("mini batch"); auto this_batch = threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, data.size())}; + batch * batchsize, std::min((batch + 1) * batchsize, data.size()) + }; process_batch( data::make_const_view(data, this_batch), centroids, diff --git a/include/svs/core/logging.h b/include/svs/core/logging.h index bb885b63..01396d52 100644 --- a/include/svs/core/logging.h +++ b/include/svs/core/logging.h @@ -60,7 +60,8 @@ inline constexpr std::array all_levels = { Level::Warn, Level::Error, Level::Critical, - Level::Off}; + Level::Off +}; /// @brief The type of the global logger. using logger_ptr = std::shared_ptr<::spdlog::logger>; diff --git a/include/svs/index/flat/dynamic_flat.h b/include/svs/index/flat/dynamic_flat.h index 5a83fe22..b65f3fd1 100644 --- a/include/svs/index/flat/dynamic_flat.h +++ b/include/svs/index/flat/dynamic_flat.h @@ -573,13 +573,14 @@ template class DynamicFlatIndex { threads::parallel_for( threadpool_, threads::DynamicPartition{ - queries.size(), - compute_query_batch_size(search_parameters, queries.size())}, + queries.size(), compute_query_batch_size(search_parameters, queries.size()) + }, [&](const auto& query_indices, uint64_t /*tid*/) { // Broadcast the distance functor so each thread can process all queries // in its current batch. distance::BroadcastDistance distances{ - extensions::distance(data_, distance_), query_indices.size()}; + extensions::distance(data_, distance_), query_indices.size() + }; search_patch( queries, diff --git a/include/svs/index/flat/flat.h b/include/svs/index/flat/flat.h index 187fc744..194e642d 100644 --- a/include/svs/index/flat/flat.h +++ b/include/svs/index/flat/flat.h @@ -388,13 +388,14 @@ class FlatIndex { threads::parallel_for( threadpool_, threads::DynamicPartition{ - queries.size(), - compute_query_batch_size(search_parameters, queries.size())}, + queries.size(), compute_query_batch_size(search_parameters, queries.size()) + }, [&](const auto& query_indices, uint64_t /*tid*/) { // Broadcast the distance functor so each thread can process all queries // in its current batch. distance::BroadcastDistance distances{ - extensions::distance(data_, distance_), query_indices.size()}; + extensions::distance(data_, distance_), query_indices.size() + }; search_patch( queries, @@ -579,7 +580,8 @@ template temporary_flat_index(Data& data, Dist distance, ThreadPoolProto threadpool_proto) { return TemporaryFlatIndex{ - data, distance, threads::as_threadpool(std::move(threadpool_proto))}; + data, distance, threads::as_threadpool(std::move(threadpool_proto)) + }; } } // namespace svs::index::flat diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index 3d3fc24c..d7e430fd 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -70,7 +70,8 @@ template class SparseClusteredDatase const Original& original, const Clustering& clustering, const Alloc& allocator ) : SparseClusteredDataset{ - original, clustering, clustering.packed_leaf_translation(), allocator} {} + original, clustering, clustering.packed_leaf_translation(), allocator + } {} template SparseClusteredDataset( @@ -95,7 +96,8 @@ template class SparseClusteredDatase for (auto neighbor : cluster) { auto global_id = neighbor.id(); these_ids.at(i) = SparseIDs{ - .local = global_to_local_map.at(global_id), .global = global_id}; + .local = global_to_local_map.at(global_id), .global = global_id + }; ++i; } }); @@ -394,7 +396,8 @@ template class InvertedIndex { ///// Search Parameter Setting search_parameters_type get_search_parameters() const { return InvertedSearchParameters{ - index_.get_search_parameters(), refinement_epsilon_}; + index_.get_search_parameters(), refinement_epsilon_ + }; } void set_search_parameters(const search_parameters_type& parameters) { @@ -604,7 +607,8 @@ auto auto_build( strategy(data, clustering, HugepageAllocator()), std::move(centroids), std::move(primary_threadpool), - std::move(logger)}; + std::move(logger) + }; } ///// Auto Assembling. diff --git a/include/svs/index/inverted/memory_search_params.h b/include/svs/index/inverted/memory_search_params.h index 0d867fa2..8b6557cc 100644 --- a/include/svs/index/inverted/memory_search_params.h +++ b/include/svs/index/inverted/memory_search_params.h @@ -54,7 +54,8 @@ struct InvertedSearchParameters { static InvertedSearchParameters load(const lib::ContextFreeLoadTable& table) { return InvertedSearchParameters{ SVS_LOAD_MEMBER_AT_(table, primary_parameters), - SVS_LOAD_MEMBER_AT_(table, refinement_epsilon)}; + SVS_LOAD_MEMBER_AT_(table, refinement_epsilon) + }; } constexpr friend bool diff --git a/include/svs/index/ivf/clustering.h b/include/svs/index/ivf/clustering.h index 8f055517..b70989cd 100644 --- a/include/svs/index/ivf/clustering.h +++ b/include/svs/index/ivf/clustering.h @@ -239,7 +239,8 @@ template class Clustering { if (saved_data_type != datatype_v) { auto centroids_orig = lib::load_at>(table, "centroids"); - if constexpr (std::is_same_v || std::is_same_v) { + if constexpr (std::is_same_v || + std::is_same_v) { auto centroids = convert_data(centroids_orig, threadpool); return Clustering{centroids, deserialize_clusters(io)}; } else { @@ -248,7 +249,8 @@ template class Clustering { } return Clustering{ - SVS_LOAD_MEMBER_AT_(table, centroids), deserialize_clusters(io)}; + SVS_LOAD_MEMBER_AT_(table, centroids), deserialize_clusters(io) + }; } }; diff --git a/include/svs/index/ivf/common.h b/include/svs/index/ivf/common.h index 28f12151..4bd9863f 100644 --- a/include/svs/index/ivf/common.h +++ b/include/svs/index/ivf/common.h @@ -160,7 +160,8 @@ struct IVFSearchParameters { static IVFSearchParameters load(const lib::ContextFreeLoadTable& table) { return IVFSearchParameters{ - SVS_LOAD_MEMBER_AT_(table, n_probes), SVS_LOAD_MEMBER_AT_(table, k_reorder)}; + SVS_LOAD_MEMBER_AT_(table, n_probes), SVS_LOAD_MEMBER_AT_(table, k_reorder) + }; } constexpr friend bool @@ -535,7 +536,8 @@ auto kmeans_training( for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, data.size())}; + batch * batchsize, std::min((batch + 1) * batchsize, data.size()) + }; auto data_batch = data::make_view(data, this_batch); centroid_assignment( data_batch, diff --git a/include/svs/index/ivf/hierarchical_kmeans.h b/include/svs/index/ivf/hierarchical_kmeans.h index 16854086..2afbf5d9 100644 --- a/include/svs/index/ivf/hierarchical_kmeans.h +++ b/include/svs/index/ivf/hierarchical_kmeans.h @@ -139,7 +139,8 @@ auto hierarchical_kmeans_clustering_impl( for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, data_train.size())}; + batch * batchsize, std::min((batch + 1) * batchsize, data_train.size()) + }; auto data_batch = data::make_view(data_train, this_batch); centroid_assignment( data_batch, @@ -170,7 +171,8 @@ auto hierarchical_kmeans_clustering_impl( auto data_batch = data::SimpleData{batchsize, ndims}; for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, data.size())}; + batch * batchsize, std::min((batch + 1) * batchsize, data.size()) + }; auto data_batch_view = data::make_view(data, this_batch); auto all_assignments_convert = timer.push_back("level1 all assignments convert"); convert_data(data_batch_view, data_batch, threadpool); @@ -275,8 +277,8 @@ auto hierarchical_kmeans_clustering_impl( maybe_compute_norms(centroids_level2_fp32, threadpool); for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = threads::UnitRange{ - batch * batchsize, - std::min((batch + 1) * batchsize, num_assignments_l2_all)}; + batch * batchsize, std::min((batch + 1) * batchsize, num_assignments_l2_all) + }; auto data_batch = data::make_view(data_level2, this_batch); centroid_assignment( data_batch, diff --git a/include/svs/index/ivf/kmeans.h b/include/svs/index/ivf/kmeans.h index c29d5c7f..46cdfb6f 100644 --- a/include/svs/index/ivf/kmeans.h +++ b/include/svs/index/ivf/kmeans.h @@ -86,7 +86,8 @@ auto kmeans_clustering_impl( auto data_batch = data::SimpleData{batchsize, ndims}; for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, data.size())}; + batch * batchsize, std::min((batch + 1) * batchsize, data.size()) + }; auto data_batch_view = data::make_view(data, this_batch); convert_data(data_batch_view, data_batch, threadpool); centroid_assignment( diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 169be199..8fe0fa65 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -304,7 +304,8 @@ class MutableVamanaIndex { sp.search_buffer_visited_set_ ), extensions::single_search_setup(data_, distance_), - {sp.prefetch_lookahead_, sp.prefetch_step_}}; + {sp.prefetch_lookahead_, sp.prefetch_step_} + }; } scratchspace_type scratchspace() const { return scratchspace(get_search_parameters()); } @@ -512,7 +513,8 @@ class MutableVamanaIndex { search_buffer_type{sp.buffer_config_, distance::comparator(distance_)}; auto prefetch_parameters = GreedySearchPrefetchParameters{ - sp.prefetch_lookahead_, sp.prefetch_step_}; + sp.prefetch_lookahead_, sp.prefetch_step_ + }; // Legalize search buffer for this search. if (buffer.target_capacity() < num_neighbors) { @@ -696,7 +698,8 @@ class MutableVamanaIndex { construction_window_size_, max_candidates_, prune_to_, - use_full_search_history_}; + use_full_search_history_ + }; auto sp = get_search_parameters(); auto prefetch_parameters = @@ -709,7 +712,8 @@ class MutableVamanaIndex { threadpool_, prefetch_parameters, logger_, - logging::Level::Trace}; + logging::Level::Trace + }; builder.construct(alpha_, entry_point(), slots, logging::Level::Trace, logger_); // Mark all added entries as valid. for (const auto& i : slots) { @@ -1011,7 +1015,8 @@ class MutableVamanaIndex { get_max_candidates(), prune_to_, get_full_search_history()}, - get_search_parameters()}; + get_search_parameters() + }; return lib::SaveTable( "vamana_dynamic_auxiliary_parameters", @@ -1325,7 +1330,8 @@ struct VamanaStateLoader { if (debug_load_from_static) { return VamanaStateLoader{ lib::load(table), - IDTranslator::Identity(assume_datasize)}; + IDTranslator::Identity(assume_datasize) + }; } return VamanaStateLoader{ @@ -1426,7 +1432,8 @@ auto auto_dynamic_assemble( std::move(distance), std::move(translator), std::move(threadpool), - std::move(logger)}; + std::move(logger) + }; } } // namespace svs::index::vamana diff --git a/include/svs/index/vamana/greedy_search.h b/include/svs/index/vamana/greedy_search.h index f12c0129..b97c01dc 100644 --- a/include/svs/index/vamana/greedy_search.h +++ b/include/svs/index/vamana/greedy_search.h @@ -166,7 +166,8 @@ void greedy_search( auto prefetcher = lib::make_prefetcher( lib::PrefetchParameters{ - prefetch_parameters.lookahead, prefetch_parameters.step}, + prefetch_parameters.lookahead, prefetch_parameters.step + }, num_neighbors, [&](size_t i) { accessor.prefetch(dataset, neighbors[i]); }, [&](size_t i) { diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index b7c13664..b604d7d1 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -143,13 +143,16 @@ struct VamanaIndexParameters { lib::load_at(table, "construction_window_size"), lib::load_at(table, "max_candidates"), prune_to, - use_full_search_history}, + use_full_search_history + }, VamanaSearchParameters{ - SearchBufferConfig{ - lib::load_at(table, "default_search_window_size")}, + SearchBufferConfig{lib::load_at(table, "default_search_window_size") + }, lib::load_at(table, "visited_set"), 4, - 1}}; + 1 + } + }; } static VamanaIndexParameters load(const lib::ContextFreeLoadTable& table) { @@ -406,7 +409,8 @@ class VamanaIndex { entry_point, std::move(distance_function), std::move(threadpool), - logger} { + logger + } { if (graph_.n_nodes() != data_.size()) { throw ANNEXCEPTION("Wrong sizes!"); } @@ -450,7 +454,8 @@ class VamanaIndex { sp.search_buffer_visited_set_ ), extensions::single_search_setup(data_, distance_), - {sp.prefetch_lookahead_, sp.prefetch_step_}}; + {sp.prefetch_lookahead_, sp.prefetch_step_} + }; } /// @brief Return scratch-space resources for external threading with default parameters @@ -569,11 +574,12 @@ class VamanaIndex { auto search_buffer = search_buffer_type{ SearchBufferConfig(search_parameters.buffer_config_), distance::comparator(distance_), - search_parameters.search_buffer_visited_set_}; + search_parameters.search_buffer_visited_set_ + }; auto prefetch_parameters = GreedySearchPrefetchParameters{ - search_parameters.prefetch_lookahead_, - search_parameters.prefetch_step_}; + search_parameters.prefetch_lookahead_, search_parameters.prefetch_step_ + }; // Increase the search window size if the defaults are not suitable for the // requested number of neighbors. @@ -804,7 +810,8 @@ class VamanaIndex { ) const { // Construct and save runtime parameters. auto parameters = VamanaIndexParameters{ - entry_point_.front(), build_parameters_, get_search_parameters()}; + entry_point_.front(), build_parameters_, get_search_parameters() + }; // Config lib::save_to_disk(parameters, config_directory); @@ -951,7 +958,8 @@ auto auto_build( lib::narrow(entry_point), std::move(distance), std::move(threadpool), - logger}; + logger + }; } /// @@ -1000,7 +1008,8 @@ auto auto_assemble( I{}, std::move(distance), std::move(threadpool), - std::move(logger)}; + std::move(logger) + }; auto config = lib::load_from_disk(config_path); index.apply(config); return index; diff --git a/include/svs/index/vamana/iterator.h b/include/svs/index/vamana/iterator.h index 1d4ef788..4c39d9db 100644 --- a/include/svs/index/vamana/iterator.h +++ b/include/svs/index/vamana/iterator.h @@ -48,8 +48,8 @@ template struct RestartInitializer { ) const { // Restart the search from scratch if requested. if (hard_restart_) { - vamana::EntryPointInitializer{ - entry_points_}(buffer, computer, graph, builder, tracker); + vamana::EntryPointInitializer{entry_points_ + }(buffer, computer, graph, builder, tracker); return; } @@ -190,7 +190,8 @@ template class BatchIterator { template svs::Neighbor adapt(N internal) const { if constexpr (Index::needs_id_translation) { return Neighbor{ - parent_->translate_internal_id(internal.id()), internal.distance()}; + parent_->translate_internal_id(internal.id()), internal.distance() + }; } else { return internal; } @@ -236,10 +237,8 @@ template class BatchIterator { auto& buffer = scratchspace_.buffer; auto& prefetch = scratchspace_.prefetch_parameters; return VamanaSearchParameters{ - buffer.config(), - buffer.visited_set_enabled(), - prefetch.lookahead, - prefetch.step}; + buffer.config(), buffer.visited_set_enabled(), prefetch.lookahead, prefetch.step + }; } /// @brief Prepares the next batch of neighbors (up to ``batch_size``) from the index. diff --git a/include/svs/index/vamana/iterator_schedule.h b/include/svs/index/vamana/iterator_schedule.h index 0a2ff9f4..0036d744 100644 --- a/include/svs/index/vamana/iterator_schedule.h +++ b/include/svs/index/vamana/iterator_schedule.h @@ -179,7 +179,8 @@ class LinearSchedule { lib::narrow_cast(batchsize), enable_filter_after, lib::narrow_cast(batchsize), - uint16_t{0}} {} + uint16_t{0} + } {} /// @brief Update the search buffer scaling parameters. /// @@ -358,9 +359,8 @@ class AbstractIteratorSchedule { /// arguments to the class's constructor. template AbstractIteratorSchedule(std::in_place_type_t SVS_UNUSED(tag), Args&&... args) - : iface_{std::make_unique>( - std::in_place, SVS_FWD(args)... - )} {} + : iface_{std::make_unique>(std::in_place, SVS_FWD(args)...) + } {} /// @brief Replace the wrapped schedule with a new schedule. template void reset(Schedule schedule) { diff --git a/include/svs/index/vamana/multi.h b/include/svs/index/vamana/multi.h index 2df6be9c..eea2559f 100644 --- a/include/svs/index/vamana/multi.h +++ b/include/svs/index/vamana/multi.h @@ -609,7 +609,8 @@ class MultiMutableVamanaIndex { get_max_candidates(), get_prune_to(), get_full_search_history()}, - get_search_parameters()}; + get_search_parameters() + }; return lib::SaveTable( "multi_vamana_dynamic_auxiliary_parameters", @@ -683,18 +684,21 @@ struct MultiVamanaStateLoader { return MultiVamanaStateLoader{ SVS_LOAD_MEMBER_AT_(table, parameters), IDTranslator{}, - std::move(labels)}; + std::move(labels) + }; } case MultiMutableVamanaLoad::FROM_DYNAMIC: return MultiVamanaStateLoader{ SVS_LOAD_MEMBER_AT_(table, parameters), svs::lib::load_at(table, "translation"), - std::vector{}}; + std::vector{} + }; case MultiMutableVamanaLoad::FROM_STATIC: return MultiVamanaStateLoader{ lib::load(table), IDTranslator::Identity(assume_datasize), - std::vector{}}; + std::vector{} + }; default: throw ANNEXCEPTION("Invalid multi vamana load type"); } @@ -757,7 +761,8 @@ auto auto_multi_dynamic_assemble( std::move(distance), labels, std::move(threadpool), - std::move(logger)}; + std::move(logger) + }; } case MultiMutableVamanaLoad::FROM_DYNAMIC: case MultiMutableVamanaLoad::FROM_STATIC: { @@ -782,7 +787,8 @@ auto auto_multi_dynamic_assemble( std::move(distance), std::move(translator), std::move(threadpool), - std::move(logger)}; + std::move(logger) + }; } default: throw ANNEXCEPTION("Invalid multi vamana load type"); diff --git a/include/svs/index/vamana/search_params.h b/include/svs/index/vamana/search_params.h index ca01bade..191928d1 100644 --- a/include/svs/index/vamana/search_params.h +++ b/include/svs/index/vamana/search_params.h @@ -105,7 +105,8 @@ struct VamanaSearchParameters { ), SVS_LOAD_MEMBER_AT_(table, search_buffer_visited_set), 4, - 1}; + 1 + }; } static VamanaSearchParameters load(const lib::ContextFreeLoadTable& table) { diff --git a/include/svs/index/vamana/vamana_build.h b/include/svs/index/vamana/vamana_build.h index 77d5cead..05a8ae21 100644 --- a/include/svs/index/vamana/vamana_build.h +++ b/include/svs/index/vamana/vamana_build.h @@ -125,7 +125,8 @@ template class BackedgeBuffer { BackedgeBuffer(size_t num_elements, size_t bucket_size) : BackedgeBuffer(BackedgeBufferParameters{ - bucket_size, lib::div_round_up(num_elements, bucket_size)}) {} + bucket_size, lib::div_round_up(num_elements, bucket_size) + }) {} // Add a point. void add_edge(Idx src, Idx dst) { @@ -540,7 +541,8 @@ class VamanaBuilder { i, distance::compute( general_distance, src_data, general_accessor(data_, i) - )}; + ) + }; }; candidates.clear(); diff --git a/include/svs/lib/algorithms.h b/include/svs/lib/algorithms.h index 07e71646..3fff652b 100644 --- a/include/svs/lib/algorithms.h +++ b/include/svs/lib/algorithms.h @@ -42,7 +42,8 @@ template struct MinMax { /// A static initializer to keep this class a simple aggregate. static MinMax init() { return MinMax{ - .min = std::numeric_limits::max(), .max = std::numeric_limits::lowest()}; + .min = std::numeric_limits::max(), .max = std::numeric_limits::lowest() + }; } void update(T x) { diff --git a/include/svs/lib/array.h b/include/svs/lib/array.h index fef4b3f8..eafcd99a 100644 --- a/include/svs/lib/array.h +++ b/include/svs/lib/array.h @@ -347,8 +347,8 @@ template > class D DenseArray(const DenseArray& other) : pointer_{nullptr} , dims_{other.dims_} - , allocator_{ - atraits::select_on_container_copy_construction(other.get_allocator())} { + , allocator_{atraits::select_on_container_copy_construction(other.get_allocator()) + } { size_t sz = other.size(); pointer_ = atraits::allocate(allocator_, sz); assign(other.begin(), other.end()); diff --git a/include/svs/lib/dispatcher.h b/include/svs/lib/dispatcher.h index a158fa89..7253fd73 100644 --- a/include/svs/lib/dispatcher.h +++ b/include/svs/lib/dispatcher.h @@ -449,7 +449,8 @@ auto make_matcher( // Stateless lambda -> function pointer using unary "+". return +[](const std::remove_cvref_t&... args) -> return_type { return std::array{ - dispatch_match(args)...}; + dispatch_match(args)... + }; }; } diff --git a/include/svs/lib/invoke.h b/include/svs/lib/invoke.h index 6c8be664..e17a274c 100644 --- a/include/svs/lib/invoke.h +++ b/include/svs/lib/invoke.h @@ -26,11 +26,11 @@ namespace func_ns { struct dispatcher { template requires requires(Tag&& tag, Args&&... args) { - svs_invoke(SVS_FWD(tag), SVS_FWD(args)...); - } + svs_invoke(SVS_FWD(tag), SVS_FWD(args)...); + } SVS_FORCE_INLINE constexpr auto operator()(Tag&& tag, Args&&... args) const - noexcept(noexcept(svs_invoke(SVS_FWD(tag), SVS_FWD(args)...))) - -> decltype(svs_invoke(SVS_FWD(tag), SVS_FWD(args)...)) { + noexcept(noexcept(svs_invoke(SVS_FWD(tag), SVS_FWD(args)...)) + ) -> decltype(svs_invoke(SVS_FWD(tag), SVS_FWD(args)...)) { return svs_invoke(SVS_FWD(tag), SVS_FWD(args)...); } }; @@ -42,8 +42,8 @@ inline constexpr func_ns::dispatcher svs_invoke = {}; template concept svs_invocable = requires(Tag&& tag, Args&&... args) { - svs::svs_invoke(SVS_FWD(tag), SVS_FWD(args)...); - }; + svs::svs_invoke(SVS_FWD(tag), SVS_FWD(args)...); +}; template using svs_invoke_result_t = std::invoke_result_t; diff --git a/include/svs/lib/prefetch.h b/include/svs/lib/prefetch.h index 0fd438ba..db27be26 100644 --- a/include/svs/lib/prefetch.h +++ b/include/svs/lib/prefetch.h @@ -177,7 +177,8 @@ template Prefetcher, std::remove_cvref_t> make_prefetcher(PrefetchParameters parameters, size_t imax, Op&& op, Pred&& pred) { return Prefetcher, std::remove_cvref_t>{ - parameters, imax, SVS_FWD(op), SVS_FWD(pred)}; + parameters, imax, SVS_FWD(op), SVS_FWD(pred) + }; } } // namespace svs::lib diff --git a/include/svs/lib/saveload.h b/include/svs/lib/saveload.h index da7c8fca..c530bc9d 100644 --- a/include/svs/lib/saveload.h +++ b/include/svs/lib/saveload.h @@ -45,10 +45,8 @@ template bool test_self_save_load_context_free(const T& x) { // Expected Transformation: // SVS_LIST_SAVE_(x, args...) -> {"x", svs::lib::save(x_, args...)} -#define SVS_LIST_SAVE_(name, ...) \ - { \ -#name, svs::lib::save(name##_, ##__VA_ARGS__) \ - } +#define SVS_LIST_SAVE_(name, ...) \ + { #name, svs::lib::save(name##_, ##__VA_ARGS__) } // Expected Transformation: // SVS_INSERT_SAVE_(table, x, args...) @@ -66,10 +64,8 @@ template bool test_self_save_load_context_free(const T& x) { // Expected Transformation: // SVS_LIST_SAVE_(x, args...) -> {"x", svs::lib::save(x, args...)} -#define SVS_LIST_SAVE(name, ...) \ - { \ -#name, svs::lib::save(name, ##__VA_ARGS__) \ - } +#define SVS_LIST_SAVE(name, ...) \ + { #name, svs::lib::save(name, ##__VA_ARGS__) } // Expected Transformation: // SVS_INSERT_SAVE_(table, x, args...) diff --git a/include/svs/lib/saveload/load.h b/include/svs/lib/saveload/load.h index 767e02af..4673a564 100644 --- a/include/svs/lib/saveload/load.h +++ b/include/svs/lib/saveload/load.h @@ -656,8 +656,8 @@ template struct Loader { /// Only applicable if such a static member is defined with results convertible to /// ``bool``. template - requires detail::HasStaticDirectLoad bool - can_load_direct(const std::filesystem::path& path, const Args&... args) const { + requires detail::HasStaticDirectLoad + bool can_load_direct(const std::filesystem::path& path, const Args&... args) const { return T::can_load_direct(path, args...); } @@ -825,7 +825,8 @@ inline SerializedObject begin_deserialization(const std::filesystem::path& fullp auto version = get_version(table, config_version_key); svs::lib::detail::check_global_version(version, fullpath); return SerializedObject{ - std::move(table), lib::LoadContext{fullpath.parent_path(), version}}; + std::move(table), lib::LoadContext{fullpath.parent_path(), version} + }; } } // namespace detail diff --git a/include/svs/lib/saveload/save.h b/include/svs/lib/saveload/save.h index 60f556e7..f59d6113 100644 --- a/include/svs/lib/saveload/save.h +++ b/include/svs/lib/saveload/save.h @@ -179,9 +179,9 @@ class SaveTable { namespace detail { template concept HasZeroArgSaveTo = requires(const T& x) { - { x.save() } -> std::same_as; - }; -} + { x.save() } -> std::same_as; +}; +} // namespace detail /// /// @brief Proxy object for an object ``x`` of type ``T``. diff --git a/include/svs/lib/threads/types.h b/include/svs/lib/threads/types.h index 1081d2cb..7020cb50 100644 --- a/include/svs/lib/threads/types.h +++ b/include/svs/lib/threads/types.h @@ -58,16 +58,15 @@ class ThreadingException : public std::runtime_error { /// template -concept PartitionableIterator = - requires { - // `I` must be a random access iterator. - requires std::random_access_iterator; - - // Furthermore, the difference type must "play nicely" with integers. - requires std::convertible_to, size_t>; - requires std::convertible_to>; - requires std::convertible_to>; - }; +concept PartitionableIterator = requires { + // `I` must be a random access iterator. + requires std::random_access_iterator; + + // Furthermore, the difference type must "play nicely" with integers. + requires std::convertible_to, size_t>; + requires std::convertible_to>; + requires std::convertible_to>; +}; template struct IteratorPair : std::pair { // type aliases diff --git a/include/svs/orchestrators/dynamic_flat.h b/include/svs/orchestrators/dynamic_flat.h index e06efb45..cc41e357 100644 --- a/include/svs/orchestrators/dynamic_flat.h +++ b/include/svs/orchestrators/dynamic_flat.h @@ -327,7 +327,8 @@ template DynamicFlat make_dynamic_flat(Args&&... args) { using Impl = decltype(index::flat::DynamicFlatIndex{std::forward(args)...}); return DynamicFlat{ - std::make_unique>(std::forward(args)...)}; + std::make_unique>(std::forward(args)...) + }; } } // namespace svs diff --git a/include/svs/orchestrators/dynamic_vamana.h b/include/svs/orchestrators/dynamic_vamana.h index 5d27a0d3..ccb68f98 100644 --- a/include/svs/orchestrators/dynamic_vamana.h +++ b/include/svs/orchestrators/dynamic_vamana.h @@ -136,8 +136,8 @@ class DynamicVamana : public manager::IndexManager { explicit DynamicVamana( AssembleTag SVS_UNUSED(tag), QueryTypes SVS_UNUSED(type), Impl impl ) - : base_type{ - std::make_unique>(std::move(impl))} {} + : base_type{std::make_unique>(std::move(impl)) + } {} ///// Vamana Interface void experimental_reset_performance_parameters() { @@ -456,7 +456,8 @@ template DynamicVamana make_dynamic_vamana(Args&&... args) { using Impl = decltype(index::vamana::MutableVamanaIndex{std::forward(args)...}); return DynamicVamana{ - std::make_unique>(std::forward(args)...)}; + std::make_unique>(std::forward(args)...) + }; } } // namespace svs diff --git a/include/svs/orchestrators/inverted.h b/include/svs/orchestrators/inverted.h index 6b6e5047..de90d0ff 100644 --- a/include/svs/orchestrators/inverted.h +++ b/include/svs/orchestrators/inverted.h @@ -136,7 +136,8 @@ class Inverted : public manager::IndexManager { std::move(strategy), std::move(centroid_picker), std::move(clustering_post_op) - )}; + ) + }; } ///// Assembling @@ -166,7 +167,8 @@ class Inverted : public manager::IndexManager { index_config, graph, std::move(threadpool_proto) - )}; + ) + }; } }; diff --git a/include/svs/orchestrators/vamana.h b/include/svs/orchestrators/vamana.h index 6b698c4f..4359aa2f 100644 --- a/include/svs/orchestrators/vamana.h +++ b/include/svs/orchestrators/vamana.h @@ -210,7 +210,8 @@ class VamanaImpl : public manager::ManagerImpl { return VamanaIterator{ impl(), std::span(svs::get(query), query.size(0)), - extra_search_buffer_capacity}; + extra_search_buffer_capacity + }; } ); } @@ -660,8 +661,8 @@ class Vamana : public manager::IndexManager { /// template Vamana make_vamana(Args&&... args) { using Impl = decltype(index::vamana::VamanaIndex{std::forward(args)...}); - return Vamana{ - std::make_unique>(std::forward(args)...)}; + return Vamana{std::make_unique>(std::forward(args)... + )}; } /// diff --git a/include/svs/quantization/scalar/scalar.h b/include/svs/quantization/scalar/scalar.h index 7ddf1cb9..7ab56df6 100644 --- a/include/svs/quantization/scalar/scalar.h +++ b/include/svs/quantization/scalar/scalar.h @@ -462,7 +462,8 @@ class SQDataset { auto compressed = compressor(data, threadpool, allocator); return SQDataset{ - std::move(compressed), scale, bias}; + std::move(compressed), scale, bias + }; } /// @brief Compact the dataset @@ -500,7 +501,8 @@ class SQDataset { return SQDataset{ SVS_LOAD_MEMBER_AT_(table, data, allocator), lib::load_at(table, "scale"), - lib::load_at(table, "bias")}; + lib::load_at(table, "bias") + }; } /// @brief Prefetch data in the dataset. diff --git a/tests/integration/vamana/scalar_iterator.cpp b/tests/integration/vamana/scalar_iterator.cpp index 027275a6..ac57ddba 100644 --- a/tests/integration/vamana/scalar_iterator.cpp +++ b/tests/integration/vamana/scalar_iterator.cpp @@ -47,7 +47,8 @@ void check( CATCH_REQUIRE(index.size() > num_neighbors); auto p = svs::index::vamana::VamanaSearchParameters{ - {num_neighbors, num_neighbors}, false, 0, 0}; + {num_neighbors, num_neighbors}, false, 0, 0 + }; auto scratch = index.scratchspace(p); diff --git a/tests/svs/core/allocator.cpp b/tests/svs/core/allocator.cpp index 27191454..1f4c51d1 100644 --- a/tests/svs/core/allocator.cpp +++ b/tests/svs/core/allocator.cpp @@ -202,14 +202,15 @@ CATCH_TEST_CASE("Testing Allocator", "[allocators]") { } CATCH_SECTION("Rebind") { auto alloc = svs::make_allocator_handle(svs::lib::Allocator()); - svs::lib::rebind_allocator_t rebound_alloc{ - alloc}; + svs::lib::rebind_allocator_t rebound_alloc{alloc + }; auto* ptr = rebound_alloc.allocate(num_elements); rebound_alloc.deallocate(ptr, num_elements); CATCH_STATIC_REQUIRE(std::is_same_v); svs::lib::rebind_allocator_t rebound_alloc2{ - rebound_alloc}; + rebound_alloc + }; auto* ptr2 = rebound_alloc2.allocate(num_elements); rebound_alloc2.deallocate(ptr2, num_elements); CATCH_STATIC_REQUIRE(std::is_same_v); diff --git a/tests/svs/core/data/block.cpp b/tests/svs/core/data/block.cpp index 4923b30f..bcb88524 100644 --- a/tests/svs/core/data/block.cpp +++ b/tests/svs/core/data/block.cpp @@ -73,7 +73,8 @@ template void test_blocked() { size_t expected_blocksize = 128; auto parameters = svs::data::BlockingParameters{ - .blocksize_bytes = svs::lib::prevpow2(blocksize_bytes)}; + .blocksize_bytes = svs::lib::prevpow2(blocksize_bytes) + }; auto allocator = svs::data::Blocked>(parameters); auto data = svs::data::BlockedData(num_elements, dimensions, allocator); CATCH_REQUIRE(is_blocked(data)); diff --git a/tests/svs/core/logging.cpp b/tests/svs/core/logging.cpp index c4ea3338..c989828c 100644 --- a/tests/svs/core/logging.cpp +++ b/tests/svs/core/logging.cpp @@ -43,7 +43,8 @@ CATCH_TEST_CASE("Logging", "[core][logging]") { CATCH_STATIC_REQUIRE( svs::logging::all_levels == std::array{ - Trace, Debug, Info, Warn, Error, Critical, Off} + Trace, Debug, Info, Warn, Error, Critical, Off + } ); // SVS to spdlog diff --git a/tests/svs/index/flat/dynamic_flat.cpp b/tests/svs/index/flat/dynamic_flat.cpp index f9f99e70..b8cd4775 100644 --- a/tests/svs/index/flat/dynamic_flat.cpp +++ b/tests/svs/index/flat/dynamic_flat.cpp @@ -91,7 +91,8 @@ void do_check( index.search( results.view(), svs::data::ConstSimpleDataView{ - queries.data(), queries.size(), queries.dimensions()}, + queries.data(), queries.size(), queries.dimensions() + }, search_parameters ); double search_time = svs::lib::time_difference(tic); @@ -103,9 +104,9 @@ void do_check( // compute recall double recall = svs::k_recall_at_n(gt, results, NUM_NEIGHBORS, NUM_NEIGHBORS); - std::cout << "[" << message << "] -- {" - << "operation: " << operation_time << ", groundtruth: " << groundtruth_time - << ", search: " << search_time << ", recall: " << recall << "}\n"; + std::cout << "[" << message << "] -- {" << "operation: " << operation_time + << ", groundtruth: " << groundtruth_time << ", search: " << search_time + << ", recall: " << recall << "}\n"; } template diff --git a/tests/svs/index/inverted/clustering.cpp b/tests/svs/index/inverted/clustering.cpp index 29844f61..9508eaa8 100644 --- a/tests/svs/index/inverted/clustering.cpp +++ b/tests/svs/index/inverted/clustering.cpp @@ -288,7 +288,8 @@ void test_end_to_end_clustering( }); auto vamana_parameters = svs::index::vamana::VamanaBuildParameters{ - construction_alpha, 64, 200, 1000, 60, true}; + construction_alpha, 64, 200, 1000, 60, true + }; // Build the index once and reuse it multiple times to help speed up tests. for (size_t max_replicas : {2, 8}) { diff --git a/tests/svs/index/vamana/dynamic_index.cpp b/tests/svs/index/vamana/dynamic_index.cpp index 17725887..baf21ad5 100644 --- a/tests/svs/index/vamana/dynamic_index.cpp +++ b/tests/svs/index/vamana/dynamic_index.cpp @@ -40,7 +40,8 @@ namespace { template auto copy_dataset(const T& data) { auto copy = svs::data::SimplePolymorphicData{ - data.size(), data.dimensions()}; + data.size(), data.dimensions() + }; for (size_t i = 0; i < data.size(); ++i) { copy.set_datum(i, data.get_datum(i)); } @@ -116,7 +117,8 @@ CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { entry_point, svs::distance::DistanceL2(), svs::threads::UnitRange(0, base_data.size()), - num_threads}; + num_threads + }; check_equal(base_data, index); index.debug_check_graph_consistency(false); diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index e590ae54..b79859c3 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -134,8 +134,7 @@ struct Report { }; std::ostream& operator<<(std::ostream& stream, const Report& report) { - stream << "[" << report.message_ << "] -- {" - << "operation: " << report.operation_time_ + stream << "[" << report.message_ << "] -- {" << "operation: " << report.operation_time_ << ", groundtruth: " << report.groundtruth_time_ << ", search: " << report.search_time_ << ", recall: " << report.recall_ << "}"; return stream; @@ -326,7 +325,8 @@ CATCH_TEST_CASE("Testing Graph Index", "[graph_index][dynamic_index]") { } svs::index::vamana::VamanaBuildParameters parameters{ - 1.2, max_degree, 2 * max_degree, 1000, max_degree - 4, true}; + 1.2, max_degree, 2 * max_degree, 1000, max_degree - 4, true + }; auto tic = svs::lib::now(); auto index = svs::index::vamana::MutableVamanaIndex( diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index 464b1234..bcec23dd 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -120,7 +120,8 @@ CATCH_TEST_CASE("Vamana Index Parameters", "[index][vamana]") { CATCH_SECTION("Current version") { auto p = VamanaIndexParameters{ - 128, {12.4f, 478, 13, 4, 10, false}, {{10, 20}, true, 1, 1}}; + 128, {12.4f, 478, 13, 4, 10, false}, {{10, 20}, true, 1, 1} + }; CATCH_REQUIRE(svs::lib::test_self_save_load_context_free(p)); } } diff --git a/tests/svs/index/vamana/iterator.cpp b/tests/svs/index/vamana/iterator.cpp index 2b8d5e52..4ba19167 100644 --- a/tests/svs/index/vamana/iterator.cpp +++ b/tests/svs/index/vamana/iterator.cpp @@ -78,7 +78,8 @@ void check( CATCH_REQUIRE(index.size() > num_neighbors); auto p = svs::index::vamana::VamanaSearchParameters{ - {num_neighbors, num_neighbors}, false, 0, 0}; + {num_neighbors, num_neighbors}, false, 0, 0 + }; auto scratch = index.scratchspace(p); diff --git a/tests/svs/index/vamana/iterator_schedule.cpp b/tests/svs/index/vamana/iterator_schedule.cpp index 74a0d3b2..2bddd95f 100644 --- a/tests/svs/index/vamana/iterator_schedule.cpp +++ b/tests/svs/index/vamana/iterator_schedule.cpp @@ -189,7 +189,8 @@ CATCH_TEST_CASE("Iterator Schedules", "[vamana][index][iterator][iterator_schedu // Construct using `std::in_place_type`. // Also test the move-assignment operator while we're at it. abstract = svs::index::vamana::AbstractIteratorSchedule{ - std::in_place_type, base, size_t{10}}; + std::in_place_type, base, size_t{10} + }; test_default(abstract, 10); diff --git a/tests/svs/index/vamana/multi.cpp b/tests/svs/index/vamana/multi.cpp index af52864f..8989811b 100644 --- a/tests/svs/index/vamana/multi.cpp +++ b/tests/svs/index/vamana/multi.cpp @@ -67,7 +67,8 @@ CATCH_TEMPLATE_TEST_CASE( const auto groundtruth = test_dataset::load_groundtruth(svs::distance_type_v); const svs::index::vamana::VamanaBuildParameters build_parameters{ - alpha, max_degree, 2 * max_degree, 1000, max_degree - 4, true}; + alpha, max_degree, 2 * max_degree, 1000, max_degree - 4, true + }; const auto search_parameters = svs::index::vamana::VamanaSearchParameters(); diff --git a/tests/svs/index/vamana/search_buffer.cpp b/tests/svs/index/vamana/search_buffer.cpp index 8ecb698a..824f96c1 100644 --- a/tests/svs/index/vamana/search_buffer.cpp +++ b/tests/svs/index/vamana/search_buffer.cpp @@ -596,7 +596,8 @@ CATCH_TEST_CASE("Fuzzing", "[core][search_buffer]") { auto run_test = [&](Cmp SVS_UNUSED(cmp)) { auto setup = FuzzSetup{num_trials, dataset_size, 32, 32, seed, allow_invalid}; auto buffer = svs::index::vamana::SearchBuffer{ - svs::index::vamana::SearchBufferConfig{32, 32}}; + svs::index::vamana::SearchBufferConfig{32, 32} + }; fuzz_test(buffer, setup); // Change size; @@ -917,7 +918,8 @@ CATCH_TEST_CASE("Fuzzing Mutable", "[core][search_buffer]") { auto run_test = [&](Cmp SVS_UNUSED(cmp)) { auto setup = FuzzSetup{num_trials, dataset_size, 32, 32, seed, allow_invalid}; auto buffer = svs::index::vamana::MutableBuffer{ - svs::index::vamana::SearchBufferConfig{32, 32}}; + svs::index::vamana::SearchBufferConfig{32, 32} + }; fuzz_test(buffer, setup); // Change size; diff --git a/tests/svs/lib/array.cpp b/tests/svs/lib/array.cpp index 4c4eb06c..3fe504a4 100644 --- a/tests/svs/lib/array.cpp +++ b/tests/svs/lib/array.cpp @@ -385,7 +385,8 @@ make_source_array(const Dims& dims, size_t n_elements, size_t id, Bools check_equal( alloc, std::array{ - id, 1, sizeof(T) * n_elements, 0, 0, n_elements, 0, 0, 0, 1, 0, 0, 0, 0} + id, 1, sizeof(T) * n_elements, 0, 0, n_elements, 0, 0, 0, 1, 0, 0, 0, 0 + } ); // Assign the contents based on whether the id is A or B. diff --git a/tests/svs/lib/dispatcher.cpp b/tests/svs/lib/dispatcher.cpp index ea4f8b97..a3f872e0 100644 --- a/tests/svs/lib/dispatcher.cpp +++ b/tests/svs/lib/dispatcher.cpp @@ -290,7 +290,8 @@ CATCH_TEST_CASE("Dispatcher2", "[lib][dispatcher2]") { // Passed by mutable reference - ensure we can mutate our argument // and have this mutation be visible to the caller. - if constexpr (is_mutable_reference_v && is_mutable_reference_v) { + if constexpr (is_mutable_reference_v && + is_mutable_reference_v) { arg.push_back(4); } }; @@ -340,7 +341,8 @@ CATCH_TEST_CASE("Dispatcher2", "[lib][dispatcher2]") { // Passed by mutable reference - ensure we can mutate our argument // and have this mutation be visible to the caller. - if constexpr (is_mutable_reference_v && is_mutable_reference_v) { + if constexpr (is_mutable_reference_v && + is_mutable_reference_v) { arg.value_ = 5; } }; diff --git a/tests/svs/lib/meta.cpp b/tests/svs/lib/meta.cpp index c27fdd9c..3e4a75af 100644 --- a/tests/svs/lib/meta.cpp +++ b/tests/svs/lib/meta.cpp @@ -52,7 +52,8 @@ CATCH_TEST_CASE("Meta", "[lib][meta]") { CATCH_STATIC_REQUIRE( values == std::array{ - svs::DataType::float32, svs::DataType::uint8, svs::DataType::int64} + svs::DataType::float32, svs::DataType::uint8, svs::DataType::int64 + } ); } diff --git a/tests/svs/lib/saveload.cpp b/tests/svs/lib/saveload.cpp index eb731e1e..bb2a690f 100644 --- a/tests/svs/lib/saveload.cpp +++ b/tests/svs/lib/saveload.cpp @@ -376,7 +376,8 @@ struct BuiltIn { SVS_LOAD_MEMBER_AT_(table, bool), SVS_LOAD_MEMBER_AT_(table, str), SVS_LOAD_MEMBER_AT_(table, path), - SVS_LOAD_MEMBER_AT_(table, v)}; + SVS_LOAD_MEMBER_AT_(table, v) + }; } static svs::lib::TryLoadResult diff --git a/tests/svs/lib/threads/thread.cpp b/tests/svs/lib/threads/thread.cpp index fb4ff371..bb774f3f 100644 --- a/tests/svs/lib/threads/thread.cpp +++ b/tests/svs/lib/threads/thread.cpp @@ -455,7 +455,8 @@ CATCH_TEST_CASE("Control Block", "[core][threads][thread_control_block]") { CATCH_SECTION("Shutdown or Exception") { auto graceful_states = std::vector{ - svs::threads::ThreadState::Shutdown, svs::threads::ThreadState::Exception}; + svs::threads::ThreadState::Shutdown, svs::threads::ThreadState::Exception + }; for (auto state : graceful_states) { block.set_state(state); diff --git a/tests/svs/lib/threads/threadpool.cpp b/tests/svs/lib/threads/threadpool.cpp index 575d986e..ec589719 100644 --- a/tests/svs/lib/threads/threadpool.cpp +++ b/tests/svs/lib/threads/threadpool.cpp @@ -117,8 +117,9 @@ CATCH_TEST_CASE("Thread Pool", "[core][threads][threadpool]") { [&](const auto& range, uint64_t tid) { std::lock_guard lock{mutex}; seen_threads.push_back(tid); - ranges.push_back(threads::UnitRange{ - *(range.begin()), *(range.end())}); + ranges.push_back( + threads::UnitRange{*(range.begin()), *(range.end())} + ); } ); } diff --git a/tests/utils/schemas.cpp b/tests/utils/schemas.cpp index 4434c88c..dfdbf7fa 100644 --- a/tests/utils/schemas.cpp +++ b/tests/utils/schemas.cpp @@ -48,12 +48,14 @@ std::filesystem::path test_vtest_file() { // Expected contents for vtest std::vector> vtest_contents() { return std::vector>{ - {1.0, 2.0, 3.0, 4.0, 5.0}, {6.0, 7.0, 8.0, 9.0, 10.0}}; + {1.0, 2.0, 3.0, 4.0, 5.0}, {6.0, 7.0, 8.0, 9.0, 10.0} + }; } // Expected contents for v1 std::vector> v1_contents() { return std::vector>{ - {101.0, 102.0, 103.0, 104.0, 105.0}, {106.0, 107.0, 108.0, 109.0, 110.0}}; + {101.0, 102.0, 103.0, 104.0, 105.0}, {106.0, 107.0, 108.0, 109.0, 110.0} + }; } } // namespace test_schemas diff --git a/tests/utils/test_dataset.cpp b/tests/utils/test_dataset.cpp index d1ff3aba..d25dbb15 100644 --- a/tests/utils/test_dataset.cpp +++ b/tests/utils/test_dataset.cpp @@ -109,7 +109,8 @@ svs::graphs::SimpleBlockedGraph graph_blocked() { std::vector expected_out_neighbors() { return std::vector{ - 64, 103, 118, 45, 34, 31, 64, 121, 128, 128, 128, 128, 46, 71, 115, 112}; + 64, 103, 118, 45, 34, 31, 64, 121, 128, 128, 128, 128, 46, 71, 115, 112 + }; } // Helper to load the ground-truth for a given file. diff --git a/utils/assemble_vamana.cpp b/utils/assemble_vamana.cpp index 9190cf4d..48c1798a 100644 --- a/utils/assemble_vamana.cpp +++ b/utils/assemble_vamana.cpp @@ -60,7 +60,8 @@ void convert( std::move(data), svs::lib::narrow(entry_point), distance, - 1}; + 1 + }; index.set_alpha(alpha); index.set_construction_window_size(construction_window_size); diff --git a/utils/benchmarks/index_build.cpp b/utils/benchmarks/index_build.cpp index a7a0af06..3a572e43 100644 --- a/utils/benchmarks/index_build.cpp +++ b/utils/benchmarks/index_build.cpp @@ -83,7 +83,8 @@ std::vector benchmark( build_setup.construction_window_size, 1000, build_setup.max_degree, - true}; + true + }; auto build_time = timer.push_back("index build"); auto index = svs::index::vamana::auto_build( diff --git a/utils/build_index.cpp b/utils/build_index.cpp index 4bc7af4b..05c044b1 100644 --- a/utils/build_index.cpp +++ b/utils/build_index.cpp @@ -59,7 +59,8 @@ void build_index( build_search_window_size, max_candidate_pool_size, max_degree, - true}; + true + }; auto index = svs::Vamana::build( parameters, svs::VectorDataLoader(vecs_filename), dist_type, n_threads @@ -142,7 +143,8 @@ int svs_main(std::vector args) { {"int8", build_index}, {"uint8", build_index}, {"float", build_index}, - {"float16", build_index}}; + {"float16", build_index} + }; auto it = dispatcher.find(data_type); if (it == dispatcher.end()) { diff --git a/utils/characterization/mutable.cpp b/utils/characterization/mutable.cpp index 7dd1fdab..e26f71fb 100644 --- a/utils/characterization/mutable.cpp +++ b/utils/characterization/mutable.cpp @@ -122,8 +122,7 @@ struct Report { }; std::ostream& operator<<(std::ostream& stream, const Report& report) { - stream << "[" << report.message_ << "] -- {" - << "operation: " << report.operation_time_ + stream << "[" << report.message_ << "] -- {" << "operation: " << report.operation_time_ << ", groundtruth: " << report.groundtruth_time_ << ", search: " << report.search_time_ << ", recall: " << report.recall_ << "}"; return stream; @@ -279,7 +278,8 @@ int svs_main(std::vector args) { } svs::index::vamana::VamanaBuildParameters parameters{ - ALPHA, max_degree, 2 * max_degree, 1000, max_degree, true}; + ALPHA, max_degree, 2 * max_degree, 1000, max_degree, true + }; auto tic = svs::lib::now(); auto index = svs::index::vamana::MutableVamanaIndex( diff --git a/utils/convert_data_to_bfloat16.cpp b/utils/convert_data_to_bfloat16.cpp index 34e1d5ac..d042880c 100644 --- a/utils/convert_data_to_bfloat16.cpp +++ b/utils/convert_data_to_bfloat16.cpp @@ -55,7 +55,8 @@ int svs_main(std::vector args) { std::cout << "Converting Bin data!" << std::endl; auto reader = svs::io::binary::BinaryReader{filename_f32}; auto writer = svs::io::binary::BinaryWriter{ - filename_bf16, reader.nvectors(), reader.ndims()}; + filename_bf16, reader.nvectors(), reader.ndims() + }; for (auto i : reader) { writer << i; } diff --git a/utils/convert_data_to_float16.cpp b/utils/convert_data_to_float16.cpp index 96483203..062c0106 100644 --- a/utils/convert_data_to_float16.cpp +++ b/utils/convert_data_to_float16.cpp @@ -54,7 +54,8 @@ int svs_main(std::vector args) { std::cout << "Converting Bin data!" << std::endl; auto reader = svs::io::binary::BinaryReader{filename_f32}; auto writer = svs::io::binary::BinaryWriter{ - filename_f16, reader.nvectors(), reader.ndims()}; + filename_f16, reader.nvectors(), reader.ndims() + }; for (auto i : reader) { writer << i; } diff --git a/utils/search_index.cpp b/utils/search_index.cpp index 08bc7314..ae9cd4c9 100644 --- a/utils/search_index.cpp +++ b/utils/search_index.cpp @@ -159,7 +159,8 @@ int svs_main(std::vector&& args) { {{"float", "int8"}, search_index}, {{"float", "uint8"}, search_index}, {{"float", "float"}, search_index}, - {{"float", "float16"}, search_index}}; + {{"float", "float16"}, search_index} + }; auto it = dispatcher.find({query_data_type, db_data_type}); if (it == dispatcher.end()) { diff --git a/utils/search_index_numa.cpp b/utils/search_index_numa.cpp index 46653f1c..3103216e 100644 --- a/utils/search_index_numa.cpp +++ b/utils/search_index_numa.cpp @@ -188,7 +188,8 @@ int svs_main(std::vector args) { {{"float", "int8"}, search_index_numa}, {{"float", "uint8"}, search_index_numa}, {{"float", "float"}, search_index_numa}, - {{"float", "float16"}, search_index_numa}}; + {{"float", "float16"}, search_index_numa} + }; auto it = dispatcher.find({query_data_type, db_data_type}); if (it == dispatcher.end()) { @@ -205,10 +206,12 @@ int svs_main(std::vector args) { std::vector index_filenames{index_filename, index_filename}; std::vector graph_memory_styles{ - graph_memory_style_0, graph_memory_style_1}; + graph_memory_style_0, graph_memory_style_1 + }; std::vector graph_filenames{graph_filename_0, graph_filename_1}; std::vector data_memory_styles{ - data_memory_style, data_memory_style}; + data_memory_style, data_memory_style + }; std::vector data_filenames{data_filename, data_filename}; f(query_filename, diff --git a/utils/search_ivf.cpp b/utils/search_ivf.cpp index f1c42d4c..bef6e505 100644 --- a/utils/search_ivf.cpp +++ b/utils/search_ivf.cpp @@ -64,7 +64,8 @@ auto batch_queries( std::vector> query_batch; for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = svs::threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, query_data.size())}; + batch * batchsize, std::min((batch + 1) * batchsize, query_data.size()) + }; query_batch.push_back( svs::data::SimpleData(this_batch.size(), query_data.dimensions()) ); @@ -194,7 +195,8 @@ int svs_main(std::vector&& args) { const auto dispatcher = std::map{ {{"float", "float16"}, search_index}, {{"float", "bfloat16"}, search_index}, - {{"float", "float"}, search_index}}; + {{"float", "float"}, search_index} + }; auto it = dispatcher.find({query_data_type, db_data_type}); if (it == dispatcher.end()) { From 605ea15ac4fd5ff0fefba769d149b8e78e08d99b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Oct 2025 09:34:39 +0000 Subject: [PATCH 3/7] Add comprehensive .github/copilot-instructions.md Co-authored-by: ahuber21 <9201869+ahuber21@users.noreply.github.com> --- .github/copilot-instructions.md | 313 ++++++++++++++++++++++++++++++++ 1 file changed, 313 insertions(+) create mode 100644 .github/copilot-instructions.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 00000000..ba605003 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,313 @@ +# Copilot Instructions for ScalableVectorSearch (SVS) + +## Repository Overview + +**Scalable Vector Search (SVS)** is a high-performance C++20 library for vector similarity search, optimized for Intel x86 architectures but portable to other platforms. The library implements state-of-the-art Vamana graph-based approximate nearest neighbor (ANN) search and supports billions of high-dimensional vectors with high accuracy and speed. SVS features: + +- **Core language**: C++20 with modern concepts for optimal compiler optimizations +- **Can be used as**: Header-only library or with Python bindings +- **Runtime ISA dispatching**: Automatically uses best available instruction set (SSE, AVX2, AVX512) +- **Python bindings**: Require shape-specialized templates for different data dimensionalities +- **Key algorithms**: Vamana graph-based search, LVQ/LeanVec compression (proprietary, available via shared libraries) + +**Repository size**: Medium (~10k LOC core library, extensive tests and examples) +**Build system**: CMake 3.21+ with C++20 compiler (GCC 11+, Clang 15+) +**Test framework**: Catch2 v3.4.0 (unit tests), ctest (integration tests) + +## Critical Build Instructions + +### Prerequisites +- CMake 3.21 or higher +- C++20 compiler: GCC 11+, GCC 12+, or Clang 15+ +- Optional: Intel MKL (for IVF support with `-DSVS_EXPERIMENTAL_ENABLE_IVF=ON`) +- Python 3.9+ (for bindings) + +### Standard Build Sequence (Always Follow This Order) + +**ALWAYS use an out-of-source build directory. NEVER run cmake in the repository root.** + +```bash +# 1. Create and enter build directory +mkdir -p build +cd build + +# 2. Configure with CMake (use exact flags from CI for consistency) +cmake -DCMAKE_BUILD_TYPE=RelWithDebugInfo \ + -DSVS_BUILD_BINARIES=YES \ + -DSVS_BUILD_TESTS=YES \ + -DSVS_BUILD_EXAMPLES=YES \ + -DSVS_EXPERIMENTAL_LEANVEC=YES \ + -DSVS_NO_AVX512=NO \ + -DSVS_EXPERIMENTAL_ENABLE_IVF=OFF \ + .. + +# 3. Build (typically takes 5-10 minutes on 4 cores) +make -j$(nproc) + +# 4. Run tests from build/tests directory +cd tests +ctest -C RelWithDebugInfo +# OR run the test executable directly with filters: +./tests "[integration][build]" +``` + +**Time expectations**: +- CMake configuration: ~18-20 seconds +- Full build (first time): ~5-10 minutes on 4 cores +- Test suite: ~1-2 minutes +- C++ examples: ~10 seconds + +**Important**: If enabling IVF support (`-DSVS_EXPERIMENTAL_ENABLE_IVF=ON`), you MUST first install Intel MKL: +```bash +# On Ubuntu (requires Intel apt repository setup) +sudo apt install intel-oneapi-mkl intel-oneapi-mkl-devel +source /opt/intel/oneapi/setvars.sh +``` + +### Common Build Options (from cmake/options.cmake) + +| Option | Default | Description | +|--------|---------|-------------| +| `SVS_BUILD_BINARIES` | OFF | Build utility binaries in utils/ | +| `SVS_BUILD_TESTS` | OFF | Build test suite (Catch2-based) | +| `SVS_BUILD_EXAMPLES` | OFF | Build C++ examples | +| `SVS_BUILD_BENCHMARK` | OFF | Build benchmark executable | +| `SVS_NO_AVX512` | OFF | Disable Intel AVX-512 intrinsics | +| `SVS_EXPERIMENTAL_ENABLE_IVF` | OFF | Enable IVF (requires MKL) | +| `CMAKE_BUILD_TYPE` | Release | Use `RelWithDebugInfo` for testing | + +**Note**: The option `SVS_EXPERIMENTAL_LEANVEC` is recognized but not used internally (safe to set). + +## Code Formatting and Linting + +### Formatting (ALWAYS run before committing) + +**Tool**: clang-format version 15.x (specified in `.pre-commit-config.yaml`) +- **DO NOT** use clang-format 16+ or 14 and below - version 15.x is required + +```bash +# Format all code (run from repository root) +./tools/clang-format.sh clang-format + +# Formatted directories: bindings/python/src, bindings/python/include, +# include, benchmark, tests, utils, examples/cpp +``` + +### Pre-commit Hooks + +The repository uses pre-commit for automated formatting checks: + +```bash +# Install pre-commit (if not already installed) +pip install pre-commit + +# Install hooks (one-time setup, takes 1-2 minutes) +pre-commit install-hooks + +# Run manually (optional, CI will check) +pre-commit run --all-files +``` + +**CI check**: The `pre-commit.yml` workflow runs on all PRs and will fail if code is not formatted. + +## Testing + +### C++ Tests (Catch2) + +Tests use Catch2 v3 with prefix macros (`CATCH_TEST_CASE`, `CATCH_REQUIRE`, etc.): + +```bash +# From build/tests directory +cd build/tests + +# Run all tests +ctest -C RelWithDebugInfo +# OR +./tests + +# Run specific test tags +./tests "[integration][build]" +./tests "[core][distance]" + +# List available tags +./tests --list-tags + +# Run with verbose output +CTEST_OUTPUT_ON_FAILURE=1 ctest -C RelWithDebugInfo +``` + +**Test tags commonly used**: `[integration]`, `[build]`, `[core]`, `[distance]`, `[vamana]`, `[data]` + +### C++ Examples + +Examples are tested via ctest: + +```bash +cd build/examples/cpp +ctest -C RelWithDebugInfo +# Runs 10 example tests (~9 seconds total) +``` + +### Python Tests + +Python tests use pytest (location: `bindings/python/tests/`): + +```bash +# Build Python bindings first (requires scikit-build) +cd bindings/python +pip install -e . + +# Run tests +pytest tests/ +``` + +## Project Structure + +``` +ScalableVectorSearch/ +├── .github/ +│ ├── workflows/ # CI/CD pipelines +│ │ ├── build-linux.yml # Main build & test (Ubuntu 22.04, g++/clang) +│ │ ├── pre-commit.yml # Format checking +│ │ ├── cibuildwheel.yml # Python wheel building +│ │ └── build-*.y{a}ml # macOS, ARM builds +│ └── scripts/ # CI helper scripts +├── benchmark/ # Benchmarking framework +│ ├── include/ # Benchmark headers +│ └── src/ # Benchmark implementations +├── bindings/python/ # Python API (pybind11-based) +│ ├── include/ # Python binding headers +│ ├── src/ # Binding implementations +│ ├── tests/ # Python unit tests (pytest) +│ ├── setup.py # Python package setup +│ └── pyproject.toml # Build configuration +├── cmake/ # CMake modules +│ ├── options.cmake # ** BUILD OPTIONS (IMPORTANT) ** +│ ├── multi-arch.cmake # Multi-architecture support +│ └── *.cmake # Dependency configs (eve, fmt, spdlog, etc.) +├── data/ # Test data and schemas +│ ├── test_dataset/ # Small test datasets +│ └── schemas/ # TOML schemas for serialization +├── docker/ # Docker build environments +├── examples/ +│ ├── cpp/ # C++ usage examples +│ │ ├── vamana.cpp # Main search example +│ │ ├── types.cpp # Supported types +│ │ ├── saveload.cpp # Save/load patterns +│ │ ├── dispatcher.cpp # Compile-time dispatch +│ │ └── shared/ # LVQ/LeanVec via shared library +│ └── python/ # Python examples +├── include/svs/ # ** CORE LIBRARY HEADERS ** +│ ├── lib/ # Foundation: arrays, threads, I/O, SIMD +│ ├── core/ # Core: distance, data structures, allocators +│ ├── index/ # Index implementations +│ │ ├── vamana/ # Vamana graph index +│ │ ├── flat/ # Flat (brute-force) index +│ │ └── inverted/ # Inverted index (IVF) +│ ├── orchestrators/ # High-level APIs +│ ├── quantization/ # Vector quantization +│ └── extensions/ # ISA-specific optimizations +├── tests/ # ** C++ TEST SUITE ** +│ ├── svs/ # Unit tests (mirrors include/svs/) +│ ├── integration/ # Integration tests +│ ├── benchmark/ # Benchmark tests +│ └── utils/ # Test utilities +├── tools/ +│ ├── clang-format.sh # ** FORMATTING SCRIPT (USE THIS) ** +│ └── benchmark_inputs/ # Benchmark configurations +├── utils/ # Command-line utilities +│ ├── build_index.cpp # Index building tool +│ ├── search_index.cpp # Search tool +│ └── benchmarks/ # Benchmark runners +├── CMakeLists.txt # Main CMake configuration +├── .pre-commit-config.yaml # Pre-commit configuration +├── .clang-format # Formatting rules +└── README.md # Project documentation +``` + +## Key Files and Configurations + +| File | Purpose | +|------|---------| +| `CMakeLists.txt` | Main build configuration, version (0.0.10) | +| `cmake/options.cmake` | **All build options and flags** | +| `.pre-commit-config.yaml` | Formatting tool versions (clang-format 15) | +| `.clang-format` | Code formatting rules | +| `tools/clang-format.sh` | **Script to format all code** | +| `.github/workflows/build-linux.yml` | **Reference CI configuration** | + +## CI/CD Pipeline + +Main checks that run on every PR: + +1. **build-linux.yml**: Builds with multiple compilers (g++-11, g++-12, clang++-15) in `RelWithDebugInfo` mode, runs all C++ tests and examples +2. **pre-commit.yml**: Verifies code formatting with clang-format 15 +3. **cibuildwheel.yml**: Builds Python wheels (uses custom manylinux2014 container) + +**To replicate CI locally**: Use the exact cmake command from `build-linux.yml` (lines 70-77). + +## Common Issues and Workarounds + +### Build Issues + +1. **Problem**: CMake configuration warns about unused `SVS_EXPERIMENTAL_LEANVEC` variable + - **Solution**: This is expected and harmless - the variable is accepted but not used + +2. **Problem**: Build fails with uninitialized variable warnings on GCC 12+ + - **Solution**: Already handled - GCC 12+ adds `-Wno-uninitialized` automatically (cmake/options.cmake:208) + +3. **Problem**: IVF tests fail or IVF won't build + - **Solution**: IVF requires Intel MKL - either install MKL or use `-DSVS_EXPERIMENTAL_ENABLE_IVF=OFF` + +4. **Problem**: Tests timeout or take very long + - **Solution**: Integration tests can take 1-2 minutes; use specific test filters for faster iteration + +### Formatting Issues + +1. **Problem**: Pre-commit fails with wrong clang-format version + - **Solution**: Ensure clang-format 15.x is installed (not 16+) + +2. **Problem**: clang-format script fails + - **Solution**: Run from repository root: `./tools/clang-format.sh clang-format` + +## Quick Reference Commands + +```bash +# Complete build from scratch +rm -rf build && mkdir build && cd build +cmake -DCMAKE_BUILD_TYPE=RelWithDebugInfo -DSVS_BUILD_TESTS=YES -DSVS_BUILD_EXAMPLES=YES .. +make -j$(nproc) +cd tests && ./tests + +# Format code before commit +./tools/clang-format.sh clang-format + +# Run specific test subset +cd build/tests && ./tests "[integration]" + +# Check available test tags +cd build/tests && ./tests --list-tags + +# Clean and rebuild +rm -rf build && mkdir build && cd build && cmake .. && make -j$(nproc) +``` + +## Important Notes for Coding Agents + +1. **Trust these instructions first** - Only search the repository if information here is incomplete or incorrect +2. **Always build out-of-source** - Use a `build/` directory, never configure CMake in the repository root +3. **Follow the CI configuration** - Use the same cmake flags as `.github/workflows/build-linux.yml` for consistency +4. **Format before committing** - Run `./tools/clang-format.sh clang-format` to avoid CI failures +5. **Test early and often** - Build times are reasonable (~5-10 min), so test incrementally +6. **Header-only library** - Most code is in `include/svs/`, changes don't require recompiling everything +7. **ISA dispatching** - Runtime dispatch means the same binary runs on different CPU architectures +8. **Test filters are your friend** - Use Catch2 tags to run subsets of tests during development +9. **Python bindings are specialized** - Changes to template parameters may require Python binding updates +10. **Version is synchronized** - Keep version in sync across `CMakeLists.txt` (line 26), `setup.py` (line 43), and test files + +## Additional Resources + +- **Documentation**: https://intel.github.io/ScalableVectorSearch +- **Main README**: See repository root `README.md` for algorithm details and performance benchmarks +- **C++ Examples**: See `examples/cpp/README.md` for usage patterns +- **Test Dataset**: Small test vectors are in `data/test_dataset/` for quick validation From 540068fd6713715e50a21e417a99db0c0a18f969 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Oct 2025 10:30:21 +0000 Subject: [PATCH 4/7] Revert accidental clang-format changes Co-authored-by: ahuber21 <9201869+ahuber21@users.noreply.github.com> --- benchmark/include/svs-benchmark/build.h | 3 +- .../svs-benchmark/inverted/memory/build.h | 3 +- .../svs-benchmark/inverted/memory/search.h | 6 +-- .../svs-benchmark/inverted/memory/test.h | 3 +- benchmark/include/svs-benchmark/ivf/search.h | 3 +- benchmark/include/svs-benchmark/ivf/test.h | 3 +- benchmark/include/svs-benchmark/test.h | 9 ++--- .../include/svs-benchmark/vamana/build.h | 3 +- .../include/svs-benchmark/vamana/iterator.h | 37 +++++++------------ .../include/svs-benchmark/vamana/search.h | 3 +- benchmark/include/svs-benchmark/vamana/test.h | 3 +- .../src/inverted/memory/uncompressed.cpp | 6 +-- benchmark/src/ivf/uncompressed.cpp | 18 +++------ benchmark/src/vamana/uncompressed.cpp | 21 ++++------- bindings/python/include/svs/python/common.h | 3 +- bindings/python/include/svs/python/core.h | 3 +- bindings/python/src/ivf.cpp | 6 +-- bindings/python/src/vamana.cpp | 3 +- bindings/python/src/vamana_common.cpp | 9 ++--- include/svs/concepts/data.h | 10 ++--- include/svs/core/allocator.h | 3 +- include/svs/core/data/simple.h | 11 +++--- include/svs/core/distance/cosine.h | 12 ++---- include/svs/core/distance/distance_core.h | 3 +- include/svs/core/kmeans.h | 3 +- include/svs/core/logging.h | 3 +- include/svs/index/flat/dynamic_flat.h | 7 ++-- include/svs/index/flat/flat.h | 10 ++--- include/svs/index/inverted/memory_based.h | 12 ++---- .../svs/index/inverted/memory_search_params.h | 3 +- include/svs/index/ivf/clustering.h | 6 +-- include/svs/index/ivf/common.h | 6 +-- include/svs/index/ivf/hierarchical_kmeans.h | 10 ++--- include/svs/index/ivf/kmeans.h | 3 +- include/svs/index/vamana/dynamic_index.h | 21 ++++------- include/svs/index/vamana/greedy_search.h | 3 +- include/svs/index/vamana/index.h | 33 ++++++----------- include/svs/index/vamana/iterator.h | 13 ++++--- include/svs/index/vamana/iterator_schedule.h | 8 ++-- include/svs/index/vamana/multi.h | 18 +++------ include/svs/index/vamana/search_params.h | 3 +- include/svs/index/vamana/vamana_build.h | 6 +-- include/svs/lib/algorithms.h | 3 +- include/svs/lib/array.h | 4 +- include/svs/lib/dispatcher.h | 3 +- include/svs/lib/invoke.h | 12 +++--- include/svs/lib/prefetch.h | 3 +- include/svs/lib/saveload.h | 12 ++++-- include/svs/lib/saveload/load.h | 7 ++-- include/svs/lib/saveload/save.h | 6 +-- include/svs/lib/threads/types.h | 19 +++++----- include/svs/orchestrators/dynamic_flat.h | 3 +- include/svs/orchestrators/dynamic_vamana.h | 7 ++-- include/svs/orchestrators/inverted.h | 6 +-- include/svs/orchestrators/vamana.h | 7 ++-- include/svs/quantization/scalar/scalar.h | 6 +-- tests/integration/vamana/scalar_iterator.cpp | 3 +- tests/svs/core/allocator.cpp | 7 ++-- tests/svs/core/data/block.cpp | 3 +- tests/svs/core/logging.cpp | 3 +- tests/svs/index/flat/dynamic_flat.cpp | 9 ++--- tests/svs/index/inverted/clustering.cpp | 3 +- tests/svs/index/vamana/dynamic_index.cpp | 6 +-- tests/svs/index/vamana/dynamic_index_2.cpp | 6 +-- tests/svs/index/vamana/index.cpp | 3 +- tests/svs/index/vamana/iterator.cpp | 3 +- tests/svs/index/vamana/iterator_schedule.cpp | 3 +- tests/svs/index/vamana/multi.cpp | 3 +- tests/svs/index/vamana/search_buffer.cpp | 6 +-- tests/svs/lib/array.cpp | 3 +- tests/svs/lib/dispatcher.cpp | 6 +-- tests/svs/lib/meta.cpp | 3 +- tests/svs/lib/saveload.cpp | 3 +- tests/svs/lib/threads/thread.cpp | 3 +- tests/svs/lib/threads/threadpool.cpp | 5 +-- tests/utils/schemas.cpp | 6 +-- tests/utils/test_dataset.cpp | 3 +- utils/assemble_vamana.cpp | 3 +- utils/benchmarks/index_build.cpp | 3 +- utils/build_index.cpp | 6 +-- utils/characterization/mutable.cpp | 6 +-- utils/convert_data_to_bfloat16.cpp | 3 +- utils/convert_data_to_float16.cpp | 3 +- utils/search_index.cpp | 3 +- utils/search_index_numa.cpp | 9 ++--- utils/search_ivf.cpp | 6 +-- 86 files changed, 222 insertions(+), 360 deletions(-) diff --git a/benchmark/include/svs-benchmark/build.h b/benchmark/include/svs-benchmark/build.h index 255297f5..4e08695d 100644 --- a/benchmark/include/svs-benchmark/build.h +++ b/benchmark/include/svs-benchmark/build.h @@ -325,8 +325,7 @@ Bundle, T, Q, Distance> initialize_dynamic( .index = init(vectors, indices), .reference = std::move(reference), .queries = std::move(queries), - .build_time = 0 - }; + .build_time = 0}; bundle.build_time = svs::lib::time_difference(tic); return bundle; } diff --git a/benchmark/include/svs-benchmark/inverted/memory/build.h b/benchmark/include/svs-benchmark/inverted/memory/build.h index fdd4d445..7ee47d22 100644 --- a/benchmark/include/svs-benchmark/inverted/memory/build.h +++ b/benchmark/include/svs-benchmark/inverted/memory/build.h @@ -180,8 +180,7 @@ struct MemoryBuildJob { svs::DistanceType get_distance() const { return distance_; } svs::index::inverted::InvertedBuildParameters get_build_parameters() const { return svs::index::inverted::InvertedBuildParameters{ - clustering_parameters_, primary_build_parameters_ - }; + clustering_parameters_, primary_build_parameters_}; } std::vector get_search_configs() const { diff --git a/benchmark/include/svs-benchmark/inverted/memory/search.h b/benchmark/include/svs-benchmark/inverted/memory/search.h index 1d2e9311..4a950933 100644 --- a/benchmark/include/svs-benchmark/inverted/memory/search.h +++ b/benchmark/include/svs-benchmark/inverted/memory/search.h @@ -118,8 +118,7 @@ struct PiecewiseAssembly { SVS_LOAD_MEMBER_AT_(table, strategy), extract_filename(table, "clustering", root), extract_filename(table, "primary_index_config", root), - extract_filename(table, "primary_index_graph", root) - }; + extract_filename(table, "primary_index_graph", root)}; } }; @@ -215,8 +214,7 @@ struct MemorySearchJob { SVS_LOAD_MEMBER_AT_(table, search_targets), extract_filename(table, "original_data", data_root), extract_filename(table, "queries", data_root), - extract_filename(table, "groundtruth", data_root) - }; + extract_filename(table, "groundtruth", data_root)}; } }; diff --git a/benchmark/include/svs-benchmark/inverted/memory/test.h b/benchmark/include/svs-benchmark/inverted/memory/test.h index 420bafe9..0db7c0bf 100644 --- a/benchmark/include/svs-benchmark/inverted/memory/test.h +++ b/benchmark/include/svs-benchmark/inverted/memory/test.h @@ -108,8 +108,7 @@ struct InvertedTest { svsbenchmark::extract_filename(table, "data_f32", root), svsbenchmark::extract_filename(table, "queries_f32", root), SVS_LOAD_MEMBER_AT_(table, queries_in_training_set), - num_threads - }; + num_threads}; } }; diff --git a/benchmark/include/svs-benchmark/ivf/search.h b/benchmark/include/svs-benchmark/ivf/search.h index e3072acc..7a0c563a 100644 --- a/benchmark/include/svs-benchmark/ivf/search.h +++ b/benchmark/include/svs-benchmark/ivf/search.h @@ -199,8 +199,7 @@ struct SearchJob { SVS_LOAD_MEMBER_AT_(table, ndims), SVS_LOAD_MEMBER_AT_(table, num_threads), SVS_LOAD_MEMBER_AT_(table, search_parameters), - SVS_LOAD_MEMBER_AT_(table, preset_parameters) - }; + SVS_LOAD_MEMBER_AT_(table, preset_parameters)}; } }; diff --git a/benchmark/include/svs-benchmark/ivf/test.h b/benchmark/include/svs-benchmark/ivf/test.h index 309189a1..943272f7 100644 --- a/benchmark/include/svs-benchmark/ivf/test.h +++ b/benchmark/include/svs-benchmark/ivf/test.h @@ -119,8 +119,7 @@ struct IVFTest { svsbenchmark::extract_filename(table, "graph", root), svsbenchmark::extract_filename(table, "queries_f32", root), SVS_LOAD_MEMBER_AT_(table, queries_in_training_set), - num_threads - }; + num_threads}; } }; diff --git a/benchmark/include/svs-benchmark/test.h b/benchmark/include/svs-benchmark/test.h index 9413c0ea..a8190b99 100644 --- a/benchmark/include/svs-benchmark/test.h +++ b/benchmark/include/svs-benchmark/test.h @@ -69,8 +69,7 @@ struct DistanceAndGroundtruth { ) { return DistanceAndGroundtruth{ SVS_LOAD_MEMBER_AT_(table, distance), - svsbenchmark::extract_filename(table, "path", root) - }; + svsbenchmark::extract_filename(table, "path", root)}; } }; @@ -134,8 +133,7 @@ template struct ConfigAndResultPrototype { SVS_LOAD_MEMBER_AT_(table, num_neighbors), SVS_LOAD_MEMBER_AT_(table, recall_k), SVS_LOAD_MEMBER_AT_(table, num_queries), - SVS_LOAD_MEMBER_AT_(table, recall) - }; + SVS_LOAD_MEMBER_AT_(table, recall)}; } }; @@ -216,8 +214,7 @@ struct ExpectedResultPrototype { SVS_LOAD_MEMBER_AT_(table, dataset, root), SVS_LOAD_MEMBER_AT_(table, distance), std::move(build_parameters), - SVS_LOAD_MEMBER_AT_(table, config_and_recall) - }; + SVS_LOAD_MEMBER_AT_(table, config_and_recall)}; } }; diff --git a/benchmark/include/svs-benchmark/vamana/build.h b/benchmark/include/svs-benchmark/vamana/build.h index 86e2202f..5278f96d 100644 --- a/benchmark/include/svs-benchmark/vamana/build.h +++ b/benchmark/include/svs-benchmark/vamana/build.h @@ -380,8 +380,7 @@ struct BuildJob : public BuildJobBase { load_preset(), SVS_LOAD_MEMBER_AT_(table, search_parameters), load_save_directory(), - BuildJobBase::from_toml(table, root) - }; + BuildJobBase::from_toml(table, root)}; } }; diff --git a/benchmark/include/svs-benchmark/vamana/iterator.h b/benchmark/include/svs-benchmark/vamana/iterator.h index b4b887d3..7cb73cab 100644 --- a/benchmark/include/svs-benchmark/vamana/iterator.h +++ b/benchmark/include/svs-benchmark/vamana/iterator.h @@ -63,8 +63,7 @@ struct IteratorSearchParameters { {SVS_LIST_SAVE_(batch_sizes), SVS_LIST_SAVE_(target_recalls), SVS_LIST_SAVE_(num_batches), - SVS_LIST_SAVE_(query_subsample)} - }; + SVS_LIST_SAVE_(query_subsample)}}; } static IteratorSearchParameters load(const svs::lib::ContextFreeLoadTable& table) { @@ -72,8 +71,7 @@ struct IteratorSearchParameters { SVS_LOAD_MEMBER_AT_(table, batch_sizes), SVS_LOAD_MEMBER_AT_(table, target_recalls), SVS_LOAD_MEMBER_AT_(table, num_batches), - SVS_LOAD_MEMBER_AT_(table, query_subsample) - }; + SVS_LOAD_MEMBER_AT_(table, query_subsample)}; } }; @@ -103,10 +101,8 @@ svsbenchmark::search::QuerySet subsample( return svsbenchmark::search::QuerySet{ svs::data::ConstSimpleDataView{queries.data(), 2 * count, queries.dimensions()}, svs::data::ConstSimpleDataView{ - groundtruth.data(), 2 * count, groundtruth.dimensions() - }, - count - }; + groundtruth.data(), 2 * count, groundtruth.dimensions()}, + count}; } struct IteratorSearch { @@ -135,8 +131,7 @@ struct IteratorSearch { .distance_ = svs::DistanceType::L2, .parameters_ = IteratorSearchParameters::example(), .query_type_ = svs::DataType::float32, - .ndims_ = Extent{svs::Dynamic} - }; + .ndims_ = Extent{svs::Dynamic}}; } // Dispatch invocation. @@ -178,8 +173,7 @@ struct IteratorSearch { .distance_ = SVS_LOAD_MEMBER_AT_(table, distance), .parameters_ = SVS_LOAD_MEMBER_AT_(table, parameters), .query_type_ = SVS_LOAD_MEMBER_AT_(table, query_type), - .ndims_ = SVS_LOAD_MEMBER_AT_(table, ndims) - }; + .ndims_ = SVS_LOAD_MEMBER_AT_(table, ndims)}; } }; @@ -220,8 +214,7 @@ struct YieldedResult { SVS_LIST_SAVE_(yielded), SVS_LIST_SAVE_(total_yielded), SVS_LIST_SAVE_(total_recall), - SVS_LIST_SAVE_(execution_time)} - }; + SVS_LIST_SAVE_(execution_time)}}; } }; @@ -273,8 +266,7 @@ template struct QueryIteratorResult { SVS_LIST_SAVE_(num_batches), SVS_LIST_SAVE_(target_recall), SVS_LIST_SAVE_(report), - SVS_LIST_SAVE_(results)} - }; + SVS_LIST_SAVE_(results)}}; } }; @@ -380,8 +372,7 @@ std::vector> tune_and_search_iterator( .yielded_ = iterator.size(), .total_yielded_ = total_yielded, .total_recall_ = recall, - .execution_time_ = execution_time - }; + .execution_time_ = execution_time}; }; // Now that we have the baseline, obtain iterator based results. @@ -444,12 +435,10 @@ toml::table tune_and_search_iterator( // Use a helper lambda to save the results. // This lambda can be reused when generating the final ``toml::table`` to ensure the // layout is the same. - auto serialize_results = - [&](const std::vector>& results_so_far) { - return toml::table{ - {"job", toml_base}, {"results", svs::lib::save(results_so_far)} - }; - }; + auto serialize_results = [&](const std::vector>& + results_so_far) { + return toml::table{{"job", toml_base}, {"results", svs::lib::save(results_so_far)}}; + }; auto do_checkpoint = [&](const std::vector>& results_so_far ) { diff --git a/benchmark/include/svs-benchmark/vamana/search.h b/benchmark/include/svs-benchmark/vamana/search.h index b8efc9c3..31bba634 100644 --- a/benchmark/include/svs-benchmark/vamana/search.h +++ b/benchmark/include/svs-benchmark/vamana/search.h @@ -210,8 +210,7 @@ struct SearchJob { SVS_LOAD_MEMBER_AT_(table, ndims), SVS_LOAD_MEMBER_AT_(table, num_threads), SVS_LOAD_MEMBER_AT_(table, search_parameters), - SVS_LOAD_MEMBER_AT_(table, preset_parameters) - }; + SVS_LOAD_MEMBER_AT_(table, preset_parameters)}; } }; diff --git a/benchmark/include/svs-benchmark/vamana/test.h b/benchmark/include/svs-benchmark/vamana/test.h index c9520a44..18dc0614 100644 --- a/benchmark/include/svs-benchmark/vamana/test.h +++ b/benchmark/include/svs-benchmark/vamana/test.h @@ -118,8 +118,7 @@ struct VamanaTest { svsbenchmark::extract_filename(table, "graph", root), svsbenchmark::extract_filename(table, "queries_f32", root), SVS_LOAD_MEMBER_AT_(table, queries_in_training_set), - num_threads - }; + num_threads}; } }; diff --git a/benchmark/src/inverted/memory/uncompressed.cpp b/benchmark/src/inverted/memory/uncompressed.cpp index 05c3cca5..85fe09c8 100644 --- a/benchmark/src/inverted/memory/uncompressed.cpp +++ b/benchmark/src/inverted/memory/uncompressed.cpp @@ -220,8 +220,7 @@ svsbenchmark::TestFunctionReturn test_build(const InvertedTest& job) { index, build_job, search::QuerySet{ - std::move(queries), std::move(groundtruth), job.queries_in_training_set_ - }, + std::move(queries), std::move(groundtruth), job.queries_in_training_set_}, svsbenchmark::BuildTime{build_time}, svsbenchmark::Placeholder{} ); @@ -229,8 +228,7 @@ svsbenchmark::TestFunctionReturn test_build(const InvertedTest& job) { return svsbenchmark::TestFunctionReturn{ .key_ = "inverted_test_build", .results_ = - svs::lib::save_to_table(memory::ExpectedResult(std::move(kind), results)) - }; + svs::lib::save_to_table(memory::ExpectedResult(std::move(kind), results))}; } } // namespace diff --git a/benchmark/src/ivf/uncompressed.cpp b/benchmark/src/ivf/uncompressed.cpp index 8b31dcad..2ddc7cfd 100644 --- a/benchmark/src/ivf/uncompressed.cpp +++ b/benchmark/src/ivf/uncompressed.cpp @@ -144,8 +144,7 @@ svsbenchmark::TestFunctionReturn test_search(const IVFTest& job) { Extent(svs::Dynamic), job.num_threads_, test_search_parameters(), - test_search_configs() - }; + test_search_configs()}; // Load the components for the test. auto tic = svs::lib::now(); @@ -165,16 +164,14 @@ svsbenchmark::TestFunctionReturn test_search(const IVFTest& job) { index, search_job, svsbenchmark::search::QuerySet{ - std::move(queries), std::move(groundtruth), job.queries_in_training_set_ - }, + std::move(queries), std::move(groundtruth), job.queries_in_training_set_}, svsbenchmark::LoadTime{load_time}, svsbenchmark::Placeholder{} ); return TestFunctionReturn{ .key_ = "ivf_test_search", - .results_ = svs::lib::save_to_table(ivf::ExpectedResult(std::move(kind), results)) - }; + .results_ = svs::lib::save_to_table(ivf::ExpectedResult(std::move(kind), results))}; } template @@ -204,8 +201,7 @@ svsbenchmark::TestFunctionReturn test_build(const IVFTest& job) { svs::distance_type_v, Extent(svs::Dynamic), build_parameters, - job.num_threads_ - }; + job.num_threads_}; // Load the components for the test. auto data = svsbenchmark::convert_data( @@ -227,16 +223,14 @@ svsbenchmark::TestFunctionReturn test_build(const IVFTest& job) { index, build_job, svsbenchmark::search::QuerySet{ - std::move(queries), std::move(groundtruth), job.queries_in_training_set_ - }, + std::move(queries), std::move(groundtruth), job.queries_in_training_set_}, svsbenchmark::BuildTime{build_time}, svsbenchmark::Placeholder{} ); return TestFunctionReturn{ .key_ = "ivf_test_build", - .results_ = svs::lib::save_to_table(ivf::ExpectedResult(std::move(kind), results)) - }; + .results_ = svs::lib::save_to_table(ivf::ExpectedResult(std::move(kind), results))}; } } // namespace diff --git a/benchmark/src/vamana/uncompressed.cpp b/benchmark/src/vamana/uncompressed.cpp index b40e5f92..db88efb5 100644 --- a/benchmark/src/vamana/uncompressed.cpp +++ b/benchmark/src/vamana/uncompressed.cpp @@ -244,8 +244,7 @@ svsbenchmark::TestFunctionReturn test_search(const VamanaTest& job) { Extent(svs::Dynamic), job.num_threads_, test_search_parameters(), - test_search_configs() - }; + test_search_configs()}; // Load the components for the test. auto tic = svs::lib::now(); @@ -269,8 +268,7 @@ svsbenchmark::TestFunctionReturn test_search(const VamanaTest& job) { index, search_job, svsbenchmark::search::QuerySet{ - std::move(queries), std::move(groundtruth), job.queries_in_training_set_ - }, + std::move(queries), std::move(groundtruth), job.queries_in_training_set_}, svsbenchmark::LoadTime{load_time}, IndexTraits::test_generation_optimization() ); @@ -278,8 +276,7 @@ svsbenchmark::TestFunctionReturn test_search(const VamanaTest& job) { return TestFunctionReturn{ .key_ = "vamana_test_search", .results_ = - svs::lib::save_to_table(vamana::ExpectedResult(std::move(kind), results)) - }; + svs::lib::save_to_table(vamana::ExpectedResult(std::move(kind), results))}; } template @@ -289,8 +286,7 @@ svsbenchmark::TestFunctionReturn test_build(const VamanaTest& job) { const auto& groundtruth_path = job.groundtruth_for(distance); auto build_parameters = svs::index::vamana::VamanaBuildParameters{ - pick_alpha(distance), 32, 100, 250, 28, true - }; + pick_alpha(distance), 32, 100, 250, 28, true}; auto kind = svsbenchmark::Uncompressed(svs::datatype_v); @@ -310,8 +306,7 @@ svsbenchmark::TestFunctionReturn test_build(const VamanaTest& job) { svs::distance_type_v, Extent(svs::Dynamic), build_parameters, - job.num_threads_ - }; + job.num_threads_}; // Load the components for the test. auto tic = svs::lib::now(); @@ -331,8 +326,7 @@ svsbenchmark::TestFunctionReturn test_build(const VamanaTest& job) { index, build_job, svsbenchmark::search::QuerySet{ - std::move(queries), std::move(groundtruth), job.queries_in_training_set_ - }, + std::move(queries), std::move(groundtruth), job.queries_in_training_set_}, svsbenchmark::BuildTime{build_time}, IndexTraits::test_generation_optimization() ); @@ -340,8 +334,7 @@ svsbenchmark::TestFunctionReturn test_build(const VamanaTest& job) { return TestFunctionReturn{ .key_ = "vamana_test_build", .results_ = - svs::lib::save_to_table(vamana::ExpectedResult(std::move(kind), results)) - }; + svs::lib::save_to_table(vamana::ExpectedResult(std::move(kind), results))}; } } // namespace diff --git a/bindings/python/include/svs/python/common.h b/bindings/python/include/svs/python/common.h index c4aacd47..86817f0d 100644 --- a/bindings/python/include/svs/python/common.h +++ b/bindings/python/include/svs/python/common.h @@ -186,8 +186,7 @@ matrix_view(pybind11::array_t& data) { template pybind11::array_t numpy_vector(size_t s) { return pybind11::array_t{ - {svs::lib::narrow(s)} - }; + {svs::lib::narrow(s)}}; } /// diff --git a/bindings/python/include/svs/python/core.h b/bindings/python/include/svs/python/core.h index 1127778d..50864281 100644 --- a/bindings/python/include/svs/python/core.h +++ b/bindings/python/include/svs/python/core.h @@ -57,8 +57,7 @@ struct AnonymousVectorData { : array_{ array.template unchecked<2>().data(0, 0), svs::lib::narrow(array.shape(0)), - svs::lib::narrow(array.shape(1)) - } {} + svs::lib::narrow(array.shape(1))} {} // Interface. svs::DataType type() const { return array_.type(); } diff --git a/bindings/python/src/ivf.cpp b/bindings/python/src/ivf.cpp index 507b989f..06a651fe 100644 --- a/bindings/python/src/ivf.cpp +++ b/bindings/python/src/ivf.cpp @@ -516,8 +516,7 @@ void wrap(py::module& m) { is_hierarchical, training_fraction, hierarchical_level1_clusters, - seed - }; + seed}; }), py::arg("num_centroids") = 1000, py::arg("minibatch_size") = 10'000, @@ -560,8 +559,7 @@ void wrap(py::module& m) { Args: n_probes: The number of nearest clusters to be explored k_reorder: Level of reordering or reranking done when using compressed datasets - )" - }; + )"}; params .def(py::init(), py::arg("n_probes") = 1, py::arg("k_reorder") = 1.0) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index c31df3fd..6b4c1c75 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -428,8 +428,7 @@ void wrap(py::module& m) { window_size, max_candidate_pool_size, prune_to, - use_full_search_history - }; + use_full_search_history}; }), py::arg("alpha") = svs::FLOAT_PLACEHOLDER, py::arg("graph_max_degree") = svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT, diff --git a/bindings/python/src/vamana_common.cpp b/bindings/python/src/vamana_common.cpp index ef4812e8..f23688ee 100644 --- a/bindings/python/src/vamana_common.cpp +++ b/bindings/python/src/vamana_common.cpp @@ -86,8 +86,7 @@ See also: :py:class:`svs.VamanaSearchParameters`, that will be used to determine stopping conditions for graph search. search_buffer_capacity (int, read-only): The (expected) number of valid entries that will be available. Must be at least as large as `search_window_size`. -)" - }; +)"}; config.def(py::init<>()) .def( @@ -139,8 +138,7 @@ See also: :py:class:`Vamana.search_parameters`. Setting either ``prefetch_lookahead`` or ``prefetch_step`` to zero disables candidate prefetching during search. - )" - }; + )"}; // N.B.: Keep defaults the same as the C++ class params @@ -222,8 +220,7 @@ See also: :py:meth:`Vamana.experimental_calibrate` and train_prefetchers (bool): Flag to train prefetch parameters. use_existing_parameter_values (bool): Should optimization use existing search parameters or should it use defaults instead. -)" - }; +)"}; // N.B.: Keep defaults the same as the C++ class params.def(py::init<>(), "Instantiate with default parameters.") diff --git a/include/svs/concepts/data.h b/include/svs/concepts/data.h index 4ae5b805..eb989a50 100644 --- a/include/svs/concepts/data.h +++ b/include/svs/concepts/data.h @@ -186,8 +186,8 @@ void copy(const Input& input, Output& output) { struct GetDatumAccessor { template - SVS_FORCE_INLINE auto - operator()(const Data& data, I i) const -> decltype(data.get_datum(i)) { + SVS_FORCE_INLINE auto operator()(const Data& data, I i) const + -> decltype(data.get_datum(i)) { return data.get_datum(i); } @@ -199,9 +199,9 @@ struct GetDatumAccessor { template concept AccessorFor = requires(Accessor& accessor, const Data& data, size_t i) { - accessor(data, i); - accessor.prefetch(data, i); -}; + accessor(data, i); + accessor.prefetch(data, i); + }; } // namespace data } // namespace svs diff --git a/include/svs/core/allocator.h b/include/svs/core/allocator.h index f1f9c460..1e449a7e 100644 --- a/include/svs/core/allocator.h +++ b/include/svs/core/allocator.h @@ -98,8 +98,7 @@ static constexpr std::array hugepage_x86_options{ }; #else static constexpr std::array hugepage_x86_options{ - HugepageX86Parameters{1 << 12, 0} -}; + HugepageX86Parameters{1 << 12, 0}}; #endif // __linux__ namespace detail { diff --git a/include/svs/core/data/simple.h b/include/svs/core/data/simple.h index dbd1a67f..df0a45c3 100644 --- a/include/svs/core/data/simple.h +++ b/include/svs/core/data/simple.h @@ -167,8 +167,7 @@ struct Matcher { ) { auto matcher = Matcher{ .eltype = SVS_LOAD_MEMBER_AT(table, eltype), - .dims = SVS_LOAD_MEMBER_AT(table, dims) - }; + .dims = SVS_LOAD_MEMBER_AT(table, dims)}; // Perform a sanity check on the arguments. if (type_hint != DataType::undef && type_hint != matcher.eltype) { @@ -482,8 +481,9 @@ class SimpleData { template requires(!is_const) - void - compact(std::span new_to_old, Pool& threadpool, size_t batchsize = 1'000'000) { + void compact( + std::span new_to_old, Pool& threadpool, size_t batchsize = 1'000'000 + ) { // Allocate scratch space. batchsize = std::min(batchsize, size()); auto buffer = data::SimpleData(batchsize, dimensions()); @@ -503,8 +503,7 @@ class SimpleData { if (forced || new_size > capacity()) { auto new_data = array_type{ svs::make_dims(new_size, lib::forward_extent(dimensions())), - get_allocator() - }; + get_allocator()}; // Copy our contents into the new array. // Since the backing array is dense, we can use `memcpy`. diff --git a/include/svs/core/distance/cosine.h b/include/svs/core/distance/cosine.h index 6f2d67f6..9f492499 100644 --- a/include/svs/core/distance/cosine.h +++ b/include/svs/core/distance/cosine.h @@ -237,15 +237,13 @@ template <> struct CosineFloatOp<16> : public svs::simd::ConvertToFloat<16> { static Pair accumulate(Pair accumulator, __m512 a, __m512 b) { return { - _mm512_fmadd_ps(a, b, accumulator.op), _mm512_fmadd_ps(b, b, accumulator.norm) - }; + _mm512_fmadd_ps(a, b, accumulator.op), _mm512_fmadd_ps(b, b, accumulator.norm)}; } static Pair accumulate(mask_t m, Pair accumulator, __m512 a, __m512 b) { return { _mm512_mask3_fmadd_ps(a, b, accumulator.op, m), - _mm512_mask3_fmadd_ps(b, b, accumulator.norm, m) - }; + _mm512_mask3_fmadd_ps(b, b, accumulator.norm, m)}; } static Pair combine(Pair x, Pair y) { @@ -410,15 +408,13 @@ template <> struct CosineFloatOp<8> : public svs::simd::ConvertToFloat<8> { static Pair accumulate(Pair accumulator, __m256 a, __m256 b) { return { - _mm256_fmadd_ps(a, b, accumulator.op), _mm256_fmadd_ps(b, b, accumulator.norm) - }; + _mm256_fmadd_ps(a, b, accumulator.op), _mm256_fmadd_ps(b, b, accumulator.norm)}; } static Pair accumulate(mask_t /*m*/, Pair accumulator, __m256 a, __m256 b) { // For AVX2, masking is handled in the load operations return { - _mm256_fmadd_ps(a, b, accumulator.op), _mm256_fmadd_ps(b, b, accumulator.norm) - }; + _mm256_fmadd_ps(a, b, accumulator.op), _mm256_fmadd_ps(b, b, accumulator.norm)}; } static Pair combine(Pair x, Pair y) { diff --git a/include/svs/core/distance/distance_core.h b/include/svs/core/distance/distance_core.h index acf4a679..4f59f9de 100644 --- a/include/svs/core/distance/distance_core.h +++ b/include/svs/core/distance/distance_core.h @@ -29,8 +29,7 @@ namespace svs::distance { enum class AVX_AVAILABILITY { NONE, AVX2, AVX512 }; constexpr std::array supported_dim_list{ - 64, 96, 100, 128, 160, 200, 512, 768, svs::Dynamic -}; + 64, 96, 100, 128, 160, 200, 512, 768, svs::Dynamic}; template constexpr bool is_dim_supported() { for (auto i : supported_dim_list) { diff --git a/include/svs/core/kmeans.h b/include/svs/core/kmeans.h index 0efef457..f4f80e7f 100644 --- a/include/svs/core/kmeans.h +++ b/include/svs/core/kmeans.h @@ -191,8 +191,7 @@ data::SimpleData train_impl( for (size_t batch = 0; batch < num_batches; ++batch) { auto batch_timer = timer.push_back("mini batch"); auto this_batch = threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, data.size()) - }; + batch * batchsize, std::min((batch + 1) * batchsize, data.size())}; process_batch( data::make_const_view(data, this_batch), centroids, diff --git a/include/svs/core/logging.h b/include/svs/core/logging.h index 01396d52..bb885b63 100644 --- a/include/svs/core/logging.h +++ b/include/svs/core/logging.h @@ -60,8 +60,7 @@ inline constexpr std::array all_levels = { Level::Warn, Level::Error, Level::Critical, - Level::Off -}; + Level::Off}; /// @brief The type of the global logger. using logger_ptr = std::shared_ptr<::spdlog::logger>; diff --git a/include/svs/index/flat/dynamic_flat.h b/include/svs/index/flat/dynamic_flat.h index b65f3fd1..5a83fe22 100644 --- a/include/svs/index/flat/dynamic_flat.h +++ b/include/svs/index/flat/dynamic_flat.h @@ -573,14 +573,13 @@ template class DynamicFlatIndex { threads::parallel_for( threadpool_, threads::DynamicPartition{ - queries.size(), compute_query_batch_size(search_parameters, queries.size()) - }, + queries.size(), + compute_query_batch_size(search_parameters, queries.size())}, [&](const auto& query_indices, uint64_t /*tid*/) { // Broadcast the distance functor so each thread can process all queries // in its current batch. distance::BroadcastDistance distances{ - extensions::distance(data_, distance_), query_indices.size() - }; + extensions::distance(data_, distance_), query_indices.size()}; search_patch( queries, diff --git a/include/svs/index/flat/flat.h b/include/svs/index/flat/flat.h index 194e642d..187fc744 100644 --- a/include/svs/index/flat/flat.h +++ b/include/svs/index/flat/flat.h @@ -388,14 +388,13 @@ class FlatIndex { threads::parallel_for( threadpool_, threads::DynamicPartition{ - queries.size(), compute_query_batch_size(search_parameters, queries.size()) - }, + queries.size(), + compute_query_batch_size(search_parameters, queries.size())}, [&](const auto& query_indices, uint64_t /*tid*/) { // Broadcast the distance functor so each thread can process all queries // in its current batch. distance::BroadcastDistance distances{ - extensions::distance(data_, distance_), query_indices.size() - }; + extensions::distance(data_, distance_), query_indices.size()}; search_patch( queries, @@ -580,8 +579,7 @@ template temporary_flat_index(Data& data, Dist distance, ThreadPoolProto threadpool_proto) { return TemporaryFlatIndex{ - data, distance, threads::as_threadpool(std::move(threadpool_proto)) - }; + data, distance, threads::as_threadpool(std::move(threadpool_proto))}; } } // namespace svs::index::flat diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index d7e430fd..3d3fc24c 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -70,8 +70,7 @@ template class SparseClusteredDatase const Original& original, const Clustering& clustering, const Alloc& allocator ) : SparseClusteredDataset{ - original, clustering, clustering.packed_leaf_translation(), allocator - } {} + original, clustering, clustering.packed_leaf_translation(), allocator} {} template SparseClusteredDataset( @@ -96,8 +95,7 @@ template class SparseClusteredDatase for (auto neighbor : cluster) { auto global_id = neighbor.id(); these_ids.at(i) = SparseIDs{ - .local = global_to_local_map.at(global_id), .global = global_id - }; + .local = global_to_local_map.at(global_id), .global = global_id}; ++i; } }); @@ -396,8 +394,7 @@ template class InvertedIndex { ///// Search Parameter Setting search_parameters_type get_search_parameters() const { return InvertedSearchParameters{ - index_.get_search_parameters(), refinement_epsilon_ - }; + index_.get_search_parameters(), refinement_epsilon_}; } void set_search_parameters(const search_parameters_type& parameters) { @@ -607,8 +604,7 @@ auto auto_build( strategy(data, clustering, HugepageAllocator()), std::move(centroids), std::move(primary_threadpool), - std::move(logger) - }; + std::move(logger)}; } ///// Auto Assembling. diff --git a/include/svs/index/inverted/memory_search_params.h b/include/svs/index/inverted/memory_search_params.h index 8b6557cc..0d867fa2 100644 --- a/include/svs/index/inverted/memory_search_params.h +++ b/include/svs/index/inverted/memory_search_params.h @@ -54,8 +54,7 @@ struct InvertedSearchParameters { static InvertedSearchParameters load(const lib::ContextFreeLoadTable& table) { return InvertedSearchParameters{ SVS_LOAD_MEMBER_AT_(table, primary_parameters), - SVS_LOAD_MEMBER_AT_(table, refinement_epsilon) - }; + SVS_LOAD_MEMBER_AT_(table, refinement_epsilon)}; } constexpr friend bool diff --git a/include/svs/index/ivf/clustering.h b/include/svs/index/ivf/clustering.h index b70989cd..8f055517 100644 --- a/include/svs/index/ivf/clustering.h +++ b/include/svs/index/ivf/clustering.h @@ -239,8 +239,7 @@ template class Clustering { if (saved_data_type != datatype_v) { auto centroids_orig = lib::load_at>(table, "centroids"); - if constexpr (std::is_same_v || - std::is_same_v) { + if constexpr (std::is_same_v || std::is_same_v) { auto centroids = convert_data(centroids_orig, threadpool); return Clustering{centroids, deserialize_clusters(io)}; } else { @@ -249,8 +248,7 @@ template class Clustering { } return Clustering{ - SVS_LOAD_MEMBER_AT_(table, centroids), deserialize_clusters(io) - }; + SVS_LOAD_MEMBER_AT_(table, centroids), deserialize_clusters(io)}; } }; diff --git a/include/svs/index/ivf/common.h b/include/svs/index/ivf/common.h index 4bd9863f..28f12151 100644 --- a/include/svs/index/ivf/common.h +++ b/include/svs/index/ivf/common.h @@ -160,8 +160,7 @@ struct IVFSearchParameters { static IVFSearchParameters load(const lib::ContextFreeLoadTable& table) { return IVFSearchParameters{ - SVS_LOAD_MEMBER_AT_(table, n_probes), SVS_LOAD_MEMBER_AT_(table, k_reorder) - }; + SVS_LOAD_MEMBER_AT_(table, n_probes), SVS_LOAD_MEMBER_AT_(table, k_reorder)}; } constexpr friend bool @@ -536,8 +535,7 @@ auto kmeans_training( for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, data.size()) - }; + batch * batchsize, std::min((batch + 1) * batchsize, data.size())}; auto data_batch = data::make_view(data, this_batch); centroid_assignment( data_batch, diff --git a/include/svs/index/ivf/hierarchical_kmeans.h b/include/svs/index/ivf/hierarchical_kmeans.h index 2afbf5d9..16854086 100644 --- a/include/svs/index/ivf/hierarchical_kmeans.h +++ b/include/svs/index/ivf/hierarchical_kmeans.h @@ -139,8 +139,7 @@ auto hierarchical_kmeans_clustering_impl( for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, data_train.size()) - }; + batch * batchsize, std::min((batch + 1) * batchsize, data_train.size())}; auto data_batch = data::make_view(data_train, this_batch); centroid_assignment( data_batch, @@ -171,8 +170,7 @@ auto hierarchical_kmeans_clustering_impl( auto data_batch = data::SimpleData{batchsize, ndims}; for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, data.size()) - }; + batch * batchsize, std::min((batch + 1) * batchsize, data.size())}; auto data_batch_view = data::make_view(data, this_batch); auto all_assignments_convert = timer.push_back("level1 all assignments convert"); convert_data(data_batch_view, data_batch, threadpool); @@ -277,8 +275,8 @@ auto hierarchical_kmeans_clustering_impl( maybe_compute_norms(centroids_level2_fp32, threadpool); for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, num_assignments_l2_all) - }; + batch * batchsize, + std::min((batch + 1) * batchsize, num_assignments_l2_all)}; auto data_batch = data::make_view(data_level2, this_batch); centroid_assignment( data_batch, diff --git a/include/svs/index/ivf/kmeans.h b/include/svs/index/ivf/kmeans.h index 46cdfb6f..c29d5c7f 100644 --- a/include/svs/index/ivf/kmeans.h +++ b/include/svs/index/ivf/kmeans.h @@ -86,8 +86,7 @@ auto kmeans_clustering_impl( auto data_batch = data::SimpleData{batchsize, ndims}; for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, data.size()) - }; + batch * batchsize, std::min((batch + 1) * batchsize, data.size())}; auto data_batch_view = data::make_view(data, this_batch); convert_data(data_batch_view, data_batch, threadpool); centroid_assignment( diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 8fe0fa65..169be199 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -304,8 +304,7 @@ class MutableVamanaIndex { sp.search_buffer_visited_set_ ), extensions::single_search_setup(data_, distance_), - {sp.prefetch_lookahead_, sp.prefetch_step_} - }; + {sp.prefetch_lookahead_, sp.prefetch_step_}}; } scratchspace_type scratchspace() const { return scratchspace(get_search_parameters()); } @@ -513,8 +512,7 @@ class MutableVamanaIndex { search_buffer_type{sp.buffer_config_, distance::comparator(distance_)}; auto prefetch_parameters = GreedySearchPrefetchParameters{ - sp.prefetch_lookahead_, sp.prefetch_step_ - }; + sp.prefetch_lookahead_, sp.prefetch_step_}; // Legalize search buffer for this search. if (buffer.target_capacity() < num_neighbors) { @@ -698,8 +696,7 @@ class MutableVamanaIndex { construction_window_size_, max_candidates_, prune_to_, - use_full_search_history_ - }; + use_full_search_history_}; auto sp = get_search_parameters(); auto prefetch_parameters = @@ -712,8 +709,7 @@ class MutableVamanaIndex { threadpool_, prefetch_parameters, logger_, - logging::Level::Trace - }; + logging::Level::Trace}; builder.construct(alpha_, entry_point(), slots, logging::Level::Trace, logger_); // Mark all added entries as valid. for (const auto& i : slots) { @@ -1015,8 +1011,7 @@ class MutableVamanaIndex { get_max_candidates(), prune_to_, get_full_search_history()}, - get_search_parameters() - }; + get_search_parameters()}; return lib::SaveTable( "vamana_dynamic_auxiliary_parameters", @@ -1330,8 +1325,7 @@ struct VamanaStateLoader { if (debug_load_from_static) { return VamanaStateLoader{ lib::load(table), - IDTranslator::Identity(assume_datasize) - }; + IDTranslator::Identity(assume_datasize)}; } return VamanaStateLoader{ @@ -1432,8 +1426,7 @@ auto auto_dynamic_assemble( std::move(distance), std::move(translator), std::move(threadpool), - std::move(logger) - }; + std::move(logger)}; } } // namespace svs::index::vamana diff --git a/include/svs/index/vamana/greedy_search.h b/include/svs/index/vamana/greedy_search.h index b97c01dc..f12c0129 100644 --- a/include/svs/index/vamana/greedy_search.h +++ b/include/svs/index/vamana/greedy_search.h @@ -166,8 +166,7 @@ void greedy_search( auto prefetcher = lib::make_prefetcher( lib::PrefetchParameters{ - prefetch_parameters.lookahead, prefetch_parameters.step - }, + prefetch_parameters.lookahead, prefetch_parameters.step}, num_neighbors, [&](size_t i) { accessor.prefetch(dataset, neighbors[i]); }, [&](size_t i) { diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index b604d7d1..b7c13664 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -143,16 +143,13 @@ struct VamanaIndexParameters { lib::load_at(table, "construction_window_size"), lib::load_at(table, "max_candidates"), prune_to, - use_full_search_history - }, + use_full_search_history}, VamanaSearchParameters{ - SearchBufferConfig{lib::load_at(table, "default_search_window_size") - }, + SearchBufferConfig{ + lib::load_at(table, "default_search_window_size")}, lib::load_at(table, "visited_set"), 4, - 1 - } - }; + 1}}; } static VamanaIndexParameters load(const lib::ContextFreeLoadTable& table) { @@ -409,8 +406,7 @@ class VamanaIndex { entry_point, std::move(distance_function), std::move(threadpool), - logger - } { + logger} { if (graph_.n_nodes() != data_.size()) { throw ANNEXCEPTION("Wrong sizes!"); } @@ -454,8 +450,7 @@ class VamanaIndex { sp.search_buffer_visited_set_ ), extensions::single_search_setup(data_, distance_), - {sp.prefetch_lookahead_, sp.prefetch_step_} - }; + {sp.prefetch_lookahead_, sp.prefetch_step_}}; } /// @brief Return scratch-space resources for external threading with default parameters @@ -574,12 +569,11 @@ class VamanaIndex { auto search_buffer = search_buffer_type{ SearchBufferConfig(search_parameters.buffer_config_), distance::comparator(distance_), - search_parameters.search_buffer_visited_set_ - }; + search_parameters.search_buffer_visited_set_}; auto prefetch_parameters = GreedySearchPrefetchParameters{ - search_parameters.prefetch_lookahead_, search_parameters.prefetch_step_ - }; + search_parameters.prefetch_lookahead_, + search_parameters.prefetch_step_}; // Increase the search window size if the defaults are not suitable for the // requested number of neighbors. @@ -810,8 +804,7 @@ class VamanaIndex { ) const { // Construct and save runtime parameters. auto parameters = VamanaIndexParameters{ - entry_point_.front(), build_parameters_, get_search_parameters() - }; + entry_point_.front(), build_parameters_, get_search_parameters()}; // Config lib::save_to_disk(parameters, config_directory); @@ -958,8 +951,7 @@ auto auto_build( lib::narrow(entry_point), std::move(distance), std::move(threadpool), - logger - }; + logger}; } /// @@ -1008,8 +1000,7 @@ auto auto_assemble( I{}, std::move(distance), std::move(threadpool), - std::move(logger) - }; + std::move(logger)}; auto config = lib::load_from_disk(config_path); index.apply(config); return index; diff --git a/include/svs/index/vamana/iterator.h b/include/svs/index/vamana/iterator.h index 4c39d9db..1d4ef788 100644 --- a/include/svs/index/vamana/iterator.h +++ b/include/svs/index/vamana/iterator.h @@ -48,8 +48,8 @@ template struct RestartInitializer { ) const { // Restart the search from scratch if requested. if (hard_restart_) { - vamana::EntryPointInitializer{entry_points_ - }(buffer, computer, graph, builder, tracker); + vamana::EntryPointInitializer{ + entry_points_}(buffer, computer, graph, builder, tracker); return; } @@ -190,8 +190,7 @@ template class BatchIterator { template svs::Neighbor adapt(N internal) const { if constexpr (Index::needs_id_translation) { return Neighbor{ - parent_->translate_internal_id(internal.id()), internal.distance() - }; + parent_->translate_internal_id(internal.id()), internal.distance()}; } else { return internal; } @@ -237,8 +236,10 @@ template class BatchIterator { auto& buffer = scratchspace_.buffer; auto& prefetch = scratchspace_.prefetch_parameters; return VamanaSearchParameters{ - buffer.config(), buffer.visited_set_enabled(), prefetch.lookahead, prefetch.step - }; + buffer.config(), + buffer.visited_set_enabled(), + prefetch.lookahead, + prefetch.step}; } /// @brief Prepares the next batch of neighbors (up to ``batch_size``) from the index. diff --git a/include/svs/index/vamana/iterator_schedule.h b/include/svs/index/vamana/iterator_schedule.h index 0036d744..0a2ff9f4 100644 --- a/include/svs/index/vamana/iterator_schedule.h +++ b/include/svs/index/vamana/iterator_schedule.h @@ -179,8 +179,7 @@ class LinearSchedule { lib::narrow_cast(batchsize), enable_filter_after, lib::narrow_cast(batchsize), - uint16_t{0} - } {} + uint16_t{0}} {} /// @brief Update the search buffer scaling parameters. /// @@ -359,8 +358,9 @@ class AbstractIteratorSchedule { /// arguments to the class's constructor. template AbstractIteratorSchedule(std::in_place_type_t SVS_UNUSED(tag), Args&&... args) - : iface_{std::make_unique>(std::in_place, SVS_FWD(args)...) - } {} + : iface_{std::make_unique>( + std::in_place, SVS_FWD(args)... + )} {} /// @brief Replace the wrapped schedule with a new schedule. template void reset(Schedule schedule) { diff --git a/include/svs/index/vamana/multi.h b/include/svs/index/vamana/multi.h index eea2559f..2df6be9c 100644 --- a/include/svs/index/vamana/multi.h +++ b/include/svs/index/vamana/multi.h @@ -609,8 +609,7 @@ class MultiMutableVamanaIndex { get_max_candidates(), get_prune_to(), get_full_search_history()}, - get_search_parameters() - }; + get_search_parameters()}; return lib::SaveTable( "multi_vamana_dynamic_auxiliary_parameters", @@ -684,21 +683,18 @@ struct MultiVamanaStateLoader { return MultiVamanaStateLoader{ SVS_LOAD_MEMBER_AT_(table, parameters), IDTranslator{}, - std::move(labels) - }; + std::move(labels)}; } case MultiMutableVamanaLoad::FROM_DYNAMIC: return MultiVamanaStateLoader{ SVS_LOAD_MEMBER_AT_(table, parameters), svs::lib::load_at(table, "translation"), - std::vector{} - }; + std::vector{}}; case MultiMutableVamanaLoad::FROM_STATIC: return MultiVamanaStateLoader{ lib::load(table), IDTranslator::Identity(assume_datasize), - std::vector{} - }; + std::vector{}}; default: throw ANNEXCEPTION("Invalid multi vamana load type"); } @@ -761,8 +757,7 @@ auto auto_multi_dynamic_assemble( std::move(distance), labels, std::move(threadpool), - std::move(logger) - }; + std::move(logger)}; } case MultiMutableVamanaLoad::FROM_DYNAMIC: case MultiMutableVamanaLoad::FROM_STATIC: { @@ -787,8 +782,7 @@ auto auto_multi_dynamic_assemble( std::move(distance), std::move(translator), std::move(threadpool), - std::move(logger) - }; + std::move(logger)}; } default: throw ANNEXCEPTION("Invalid multi vamana load type"); diff --git a/include/svs/index/vamana/search_params.h b/include/svs/index/vamana/search_params.h index 191928d1..ca01bade 100644 --- a/include/svs/index/vamana/search_params.h +++ b/include/svs/index/vamana/search_params.h @@ -105,8 +105,7 @@ struct VamanaSearchParameters { ), SVS_LOAD_MEMBER_AT_(table, search_buffer_visited_set), 4, - 1 - }; + 1}; } static VamanaSearchParameters load(const lib::ContextFreeLoadTable& table) { diff --git a/include/svs/index/vamana/vamana_build.h b/include/svs/index/vamana/vamana_build.h index 05a8ae21..77d5cead 100644 --- a/include/svs/index/vamana/vamana_build.h +++ b/include/svs/index/vamana/vamana_build.h @@ -125,8 +125,7 @@ template class BackedgeBuffer { BackedgeBuffer(size_t num_elements, size_t bucket_size) : BackedgeBuffer(BackedgeBufferParameters{ - bucket_size, lib::div_round_up(num_elements, bucket_size) - }) {} + bucket_size, lib::div_round_up(num_elements, bucket_size)}) {} // Add a point. void add_edge(Idx src, Idx dst) { @@ -541,8 +540,7 @@ class VamanaBuilder { i, distance::compute( general_distance, src_data, general_accessor(data_, i) - ) - }; + )}; }; candidates.clear(); diff --git a/include/svs/lib/algorithms.h b/include/svs/lib/algorithms.h index 3fff652b..07e71646 100644 --- a/include/svs/lib/algorithms.h +++ b/include/svs/lib/algorithms.h @@ -42,8 +42,7 @@ template struct MinMax { /// A static initializer to keep this class a simple aggregate. static MinMax init() { return MinMax{ - .min = std::numeric_limits::max(), .max = std::numeric_limits::lowest() - }; + .min = std::numeric_limits::max(), .max = std::numeric_limits::lowest()}; } void update(T x) { diff --git a/include/svs/lib/array.h b/include/svs/lib/array.h index eafcd99a..fef4b3f8 100644 --- a/include/svs/lib/array.h +++ b/include/svs/lib/array.h @@ -347,8 +347,8 @@ template > class D DenseArray(const DenseArray& other) : pointer_{nullptr} , dims_{other.dims_} - , allocator_{atraits::select_on_container_copy_construction(other.get_allocator()) - } { + , allocator_{ + atraits::select_on_container_copy_construction(other.get_allocator())} { size_t sz = other.size(); pointer_ = atraits::allocate(allocator_, sz); assign(other.begin(), other.end()); diff --git a/include/svs/lib/dispatcher.h b/include/svs/lib/dispatcher.h index 7253fd73..a158fa89 100644 --- a/include/svs/lib/dispatcher.h +++ b/include/svs/lib/dispatcher.h @@ -449,8 +449,7 @@ auto make_matcher( // Stateless lambda -> function pointer using unary "+". return +[](const std::remove_cvref_t&... args) -> return_type { return std::array{ - dispatch_match(args)... - }; + dispatch_match(args)...}; }; } diff --git a/include/svs/lib/invoke.h b/include/svs/lib/invoke.h index e17a274c..6c8be664 100644 --- a/include/svs/lib/invoke.h +++ b/include/svs/lib/invoke.h @@ -26,11 +26,11 @@ namespace func_ns { struct dispatcher { template requires requires(Tag&& tag, Args&&... args) { - svs_invoke(SVS_FWD(tag), SVS_FWD(args)...); - } + svs_invoke(SVS_FWD(tag), SVS_FWD(args)...); + } SVS_FORCE_INLINE constexpr auto operator()(Tag&& tag, Args&&... args) const - noexcept(noexcept(svs_invoke(SVS_FWD(tag), SVS_FWD(args)...)) - ) -> decltype(svs_invoke(SVS_FWD(tag), SVS_FWD(args)...)) { + noexcept(noexcept(svs_invoke(SVS_FWD(tag), SVS_FWD(args)...))) + -> decltype(svs_invoke(SVS_FWD(tag), SVS_FWD(args)...)) { return svs_invoke(SVS_FWD(tag), SVS_FWD(args)...); } }; @@ -42,8 +42,8 @@ inline constexpr func_ns::dispatcher svs_invoke = {}; template concept svs_invocable = requires(Tag&& tag, Args&&... args) { - svs::svs_invoke(SVS_FWD(tag), SVS_FWD(args)...); -}; + svs::svs_invoke(SVS_FWD(tag), SVS_FWD(args)...); + }; template using svs_invoke_result_t = std::invoke_result_t; diff --git a/include/svs/lib/prefetch.h b/include/svs/lib/prefetch.h index db27be26..0fd438ba 100644 --- a/include/svs/lib/prefetch.h +++ b/include/svs/lib/prefetch.h @@ -177,8 +177,7 @@ template Prefetcher, std::remove_cvref_t> make_prefetcher(PrefetchParameters parameters, size_t imax, Op&& op, Pred&& pred) { return Prefetcher, std::remove_cvref_t>{ - parameters, imax, SVS_FWD(op), SVS_FWD(pred) - }; + parameters, imax, SVS_FWD(op), SVS_FWD(pred)}; } } // namespace svs::lib diff --git a/include/svs/lib/saveload.h b/include/svs/lib/saveload.h index c530bc9d..da7c8fca 100644 --- a/include/svs/lib/saveload.h +++ b/include/svs/lib/saveload.h @@ -45,8 +45,10 @@ template bool test_self_save_load_context_free(const T& x) { // Expected Transformation: // SVS_LIST_SAVE_(x, args...) -> {"x", svs::lib::save(x_, args...)} -#define SVS_LIST_SAVE_(name, ...) \ - { #name, svs::lib::save(name##_, ##__VA_ARGS__) } +#define SVS_LIST_SAVE_(name, ...) \ + { \ +#name, svs::lib::save(name##_, ##__VA_ARGS__) \ + } // Expected Transformation: // SVS_INSERT_SAVE_(table, x, args...) @@ -64,8 +66,10 @@ template bool test_self_save_load_context_free(const T& x) { // Expected Transformation: // SVS_LIST_SAVE_(x, args...) -> {"x", svs::lib::save(x, args...)} -#define SVS_LIST_SAVE(name, ...) \ - { #name, svs::lib::save(name, ##__VA_ARGS__) } +#define SVS_LIST_SAVE(name, ...) \ + { \ +#name, svs::lib::save(name, ##__VA_ARGS__) \ + } // Expected Transformation: // SVS_INSERT_SAVE_(table, x, args...) diff --git a/include/svs/lib/saveload/load.h b/include/svs/lib/saveload/load.h index 4673a564..767e02af 100644 --- a/include/svs/lib/saveload/load.h +++ b/include/svs/lib/saveload/load.h @@ -656,8 +656,8 @@ template struct Loader { /// Only applicable if such a static member is defined with results convertible to /// ``bool``. template - requires detail::HasStaticDirectLoad - bool can_load_direct(const std::filesystem::path& path, const Args&... args) const { + requires detail::HasStaticDirectLoad bool + can_load_direct(const std::filesystem::path& path, const Args&... args) const { return T::can_load_direct(path, args...); } @@ -825,8 +825,7 @@ inline SerializedObject begin_deserialization(const std::filesystem::path& fullp auto version = get_version(table, config_version_key); svs::lib::detail::check_global_version(version, fullpath); return SerializedObject{ - std::move(table), lib::LoadContext{fullpath.parent_path(), version} - }; + std::move(table), lib::LoadContext{fullpath.parent_path(), version}}; } } // namespace detail diff --git a/include/svs/lib/saveload/save.h b/include/svs/lib/saveload/save.h index f59d6113..60f556e7 100644 --- a/include/svs/lib/saveload/save.h +++ b/include/svs/lib/saveload/save.h @@ -179,9 +179,9 @@ class SaveTable { namespace detail { template concept HasZeroArgSaveTo = requires(const T& x) { - { x.save() } -> std::same_as; -}; -} // namespace detail + { x.save() } -> std::same_as; + }; +} /// /// @brief Proxy object for an object ``x`` of type ``T``. diff --git a/include/svs/lib/threads/types.h b/include/svs/lib/threads/types.h index 7020cb50..1081d2cb 100644 --- a/include/svs/lib/threads/types.h +++ b/include/svs/lib/threads/types.h @@ -58,15 +58,16 @@ class ThreadingException : public std::runtime_error { /// template -concept PartitionableIterator = requires { - // `I` must be a random access iterator. - requires std::random_access_iterator; - - // Furthermore, the difference type must "play nicely" with integers. - requires std::convertible_to, size_t>; - requires std::convertible_to>; - requires std::convertible_to>; -}; +concept PartitionableIterator = + requires { + // `I` must be a random access iterator. + requires std::random_access_iterator; + + // Furthermore, the difference type must "play nicely" with integers. + requires std::convertible_to, size_t>; + requires std::convertible_to>; + requires std::convertible_to>; + }; template struct IteratorPair : std::pair { // type aliases diff --git a/include/svs/orchestrators/dynamic_flat.h b/include/svs/orchestrators/dynamic_flat.h index cc41e357..e06efb45 100644 --- a/include/svs/orchestrators/dynamic_flat.h +++ b/include/svs/orchestrators/dynamic_flat.h @@ -327,8 +327,7 @@ template DynamicFlat make_dynamic_flat(Args&&... args) { using Impl = decltype(index::flat::DynamicFlatIndex{std::forward(args)...}); return DynamicFlat{ - std::make_unique>(std::forward(args)...) - }; + std::make_unique>(std::forward(args)...)}; } } // namespace svs diff --git a/include/svs/orchestrators/dynamic_vamana.h b/include/svs/orchestrators/dynamic_vamana.h index ccb68f98..5d27a0d3 100644 --- a/include/svs/orchestrators/dynamic_vamana.h +++ b/include/svs/orchestrators/dynamic_vamana.h @@ -136,8 +136,8 @@ class DynamicVamana : public manager::IndexManager { explicit DynamicVamana( AssembleTag SVS_UNUSED(tag), QueryTypes SVS_UNUSED(type), Impl impl ) - : base_type{std::make_unique>(std::move(impl)) - } {} + : base_type{ + std::make_unique>(std::move(impl))} {} ///// Vamana Interface void experimental_reset_performance_parameters() { @@ -456,8 +456,7 @@ template DynamicVamana make_dynamic_vamana(Args&&... args) { using Impl = decltype(index::vamana::MutableVamanaIndex{std::forward(args)...}); return DynamicVamana{ - std::make_unique>(std::forward(args)...) - }; + std::make_unique>(std::forward(args)...)}; } } // namespace svs diff --git a/include/svs/orchestrators/inverted.h b/include/svs/orchestrators/inverted.h index de90d0ff..6b6e5047 100644 --- a/include/svs/orchestrators/inverted.h +++ b/include/svs/orchestrators/inverted.h @@ -136,8 +136,7 @@ class Inverted : public manager::IndexManager { std::move(strategy), std::move(centroid_picker), std::move(clustering_post_op) - ) - }; + )}; } ///// Assembling @@ -167,8 +166,7 @@ class Inverted : public manager::IndexManager { index_config, graph, std::move(threadpool_proto) - ) - }; + )}; } }; diff --git a/include/svs/orchestrators/vamana.h b/include/svs/orchestrators/vamana.h index 4359aa2f..6b698c4f 100644 --- a/include/svs/orchestrators/vamana.h +++ b/include/svs/orchestrators/vamana.h @@ -210,8 +210,7 @@ class VamanaImpl : public manager::ManagerImpl { return VamanaIterator{ impl(), std::span(svs::get(query), query.size(0)), - extra_search_buffer_capacity - }; + extra_search_buffer_capacity}; } ); } @@ -661,8 +660,8 @@ class Vamana : public manager::IndexManager { /// template Vamana make_vamana(Args&&... args) { using Impl = decltype(index::vamana::VamanaIndex{std::forward(args)...}); - return Vamana{std::make_unique>(std::forward(args)... - )}; + return Vamana{ + std::make_unique>(std::forward(args)...)}; } /// diff --git a/include/svs/quantization/scalar/scalar.h b/include/svs/quantization/scalar/scalar.h index 7ab56df6..7ddf1cb9 100644 --- a/include/svs/quantization/scalar/scalar.h +++ b/include/svs/quantization/scalar/scalar.h @@ -462,8 +462,7 @@ class SQDataset { auto compressed = compressor(data, threadpool, allocator); return SQDataset{ - std::move(compressed), scale, bias - }; + std::move(compressed), scale, bias}; } /// @brief Compact the dataset @@ -501,8 +500,7 @@ class SQDataset { return SQDataset{ SVS_LOAD_MEMBER_AT_(table, data, allocator), lib::load_at(table, "scale"), - lib::load_at(table, "bias") - }; + lib::load_at(table, "bias")}; } /// @brief Prefetch data in the dataset. diff --git a/tests/integration/vamana/scalar_iterator.cpp b/tests/integration/vamana/scalar_iterator.cpp index ac57ddba..027275a6 100644 --- a/tests/integration/vamana/scalar_iterator.cpp +++ b/tests/integration/vamana/scalar_iterator.cpp @@ -47,8 +47,7 @@ void check( CATCH_REQUIRE(index.size() > num_neighbors); auto p = svs::index::vamana::VamanaSearchParameters{ - {num_neighbors, num_neighbors}, false, 0, 0 - }; + {num_neighbors, num_neighbors}, false, 0, 0}; auto scratch = index.scratchspace(p); diff --git a/tests/svs/core/allocator.cpp b/tests/svs/core/allocator.cpp index 1f4c51d1..27191454 100644 --- a/tests/svs/core/allocator.cpp +++ b/tests/svs/core/allocator.cpp @@ -202,15 +202,14 @@ CATCH_TEST_CASE("Testing Allocator", "[allocators]") { } CATCH_SECTION("Rebind") { auto alloc = svs::make_allocator_handle(svs::lib::Allocator()); - svs::lib::rebind_allocator_t rebound_alloc{alloc - }; + svs::lib::rebind_allocator_t rebound_alloc{ + alloc}; auto* ptr = rebound_alloc.allocate(num_elements); rebound_alloc.deallocate(ptr, num_elements); CATCH_STATIC_REQUIRE(std::is_same_v); svs::lib::rebind_allocator_t rebound_alloc2{ - rebound_alloc - }; + rebound_alloc}; auto* ptr2 = rebound_alloc2.allocate(num_elements); rebound_alloc2.deallocate(ptr2, num_elements); CATCH_STATIC_REQUIRE(std::is_same_v); diff --git a/tests/svs/core/data/block.cpp b/tests/svs/core/data/block.cpp index bcb88524..4923b30f 100644 --- a/tests/svs/core/data/block.cpp +++ b/tests/svs/core/data/block.cpp @@ -73,8 +73,7 @@ template void test_blocked() { size_t expected_blocksize = 128; auto parameters = svs::data::BlockingParameters{ - .blocksize_bytes = svs::lib::prevpow2(blocksize_bytes) - }; + .blocksize_bytes = svs::lib::prevpow2(blocksize_bytes)}; auto allocator = svs::data::Blocked>(parameters); auto data = svs::data::BlockedData(num_elements, dimensions, allocator); CATCH_REQUIRE(is_blocked(data)); diff --git a/tests/svs/core/logging.cpp b/tests/svs/core/logging.cpp index c989828c..c4ea3338 100644 --- a/tests/svs/core/logging.cpp +++ b/tests/svs/core/logging.cpp @@ -43,8 +43,7 @@ CATCH_TEST_CASE("Logging", "[core][logging]") { CATCH_STATIC_REQUIRE( svs::logging::all_levels == std::array{ - Trace, Debug, Info, Warn, Error, Critical, Off - } + Trace, Debug, Info, Warn, Error, Critical, Off} ); // SVS to spdlog diff --git a/tests/svs/index/flat/dynamic_flat.cpp b/tests/svs/index/flat/dynamic_flat.cpp index b8cd4775..f9f99e70 100644 --- a/tests/svs/index/flat/dynamic_flat.cpp +++ b/tests/svs/index/flat/dynamic_flat.cpp @@ -91,8 +91,7 @@ void do_check( index.search( results.view(), svs::data::ConstSimpleDataView{ - queries.data(), queries.size(), queries.dimensions() - }, + queries.data(), queries.size(), queries.dimensions()}, search_parameters ); double search_time = svs::lib::time_difference(tic); @@ -104,9 +103,9 @@ void do_check( // compute recall double recall = svs::k_recall_at_n(gt, results, NUM_NEIGHBORS, NUM_NEIGHBORS); - std::cout << "[" << message << "] -- {" << "operation: " << operation_time - << ", groundtruth: " << groundtruth_time << ", search: " << search_time - << ", recall: " << recall << "}\n"; + std::cout << "[" << message << "] -- {" + << "operation: " << operation_time << ", groundtruth: " << groundtruth_time + << ", search: " << search_time << ", recall: " << recall << "}\n"; } template diff --git a/tests/svs/index/inverted/clustering.cpp b/tests/svs/index/inverted/clustering.cpp index 9508eaa8..29844f61 100644 --- a/tests/svs/index/inverted/clustering.cpp +++ b/tests/svs/index/inverted/clustering.cpp @@ -288,8 +288,7 @@ void test_end_to_end_clustering( }); auto vamana_parameters = svs::index::vamana::VamanaBuildParameters{ - construction_alpha, 64, 200, 1000, 60, true - }; + construction_alpha, 64, 200, 1000, 60, true}; // Build the index once and reuse it multiple times to help speed up tests. for (size_t max_replicas : {2, 8}) { diff --git a/tests/svs/index/vamana/dynamic_index.cpp b/tests/svs/index/vamana/dynamic_index.cpp index baf21ad5..17725887 100644 --- a/tests/svs/index/vamana/dynamic_index.cpp +++ b/tests/svs/index/vamana/dynamic_index.cpp @@ -40,8 +40,7 @@ namespace { template auto copy_dataset(const T& data) { auto copy = svs::data::SimplePolymorphicData{ - data.size(), data.dimensions() - }; + data.size(), data.dimensions()}; for (size_t i = 0; i < data.size(); ++i) { copy.set_datum(i, data.get_datum(i)); } @@ -117,8 +116,7 @@ CATCH_TEST_CASE("MutableVamanaIndex", "[graph_index]") { entry_point, svs::distance::DistanceL2(), svs::threads::UnitRange(0, base_data.size()), - num_threads - }; + num_threads}; check_equal(base_data, index); index.debug_check_graph_consistency(false); diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index b79859c3..e590ae54 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -134,7 +134,8 @@ struct Report { }; std::ostream& operator<<(std::ostream& stream, const Report& report) { - stream << "[" << report.message_ << "] -- {" << "operation: " << report.operation_time_ + stream << "[" << report.message_ << "] -- {" + << "operation: " << report.operation_time_ << ", groundtruth: " << report.groundtruth_time_ << ", search: " << report.search_time_ << ", recall: " << report.recall_ << "}"; return stream; @@ -325,8 +326,7 @@ CATCH_TEST_CASE("Testing Graph Index", "[graph_index][dynamic_index]") { } svs::index::vamana::VamanaBuildParameters parameters{ - 1.2, max_degree, 2 * max_degree, 1000, max_degree - 4, true - }; + 1.2, max_degree, 2 * max_degree, 1000, max_degree - 4, true}; auto tic = svs::lib::now(); auto index = svs::index::vamana::MutableVamanaIndex( diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index bcec23dd..464b1234 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -120,8 +120,7 @@ CATCH_TEST_CASE("Vamana Index Parameters", "[index][vamana]") { CATCH_SECTION("Current version") { auto p = VamanaIndexParameters{ - 128, {12.4f, 478, 13, 4, 10, false}, {{10, 20}, true, 1, 1} - }; + 128, {12.4f, 478, 13, 4, 10, false}, {{10, 20}, true, 1, 1}}; CATCH_REQUIRE(svs::lib::test_self_save_load_context_free(p)); } } diff --git a/tests/svs/index/vamana/iterator.cpp b/tests/svs/index/vamana/iterator.cpp index 4ba19167..2b8d5e52 100644 --- a/tests/svs/index/vamana/iterator.cpp +++ b/tests/svs/index/vamana/iterator.cpp @@ -78,8 +78,7 @@ void check( CATCH_REQUIRE(index.size() > num_neighbors); auto p = svs::index::vamana::VamanaSearchParameters{ - {num_neighbors, num_neighbors}, false, 0, 0 - }; + {num_neighbors, num_neighbors}, false, 0, 0}; auto scratch = index.scratchspace(p); diff --git a/tests/svs/index/vamana/iterator_schedule.cpp b/tests/svs/index/vamana/iterator_schedule.cpp index 2bddd95f..74a0d3b2 100644 --- a/tests/svs/index/vamana/iterator_schedule.cpp +++ b/tests/svs/index/vamana/iterator_schedule.cpp @@ -189,8 +189,7 @@ CATCH_TEST_CASE("Iterator Schedules", "[vamana][index][iterator][iterator_schedu // Construct using `std::in_place_type`. // Also test the move-assignment operator while we're at it. abstract = svs::index::vamana::AbstractIteratorSchedule{ - std::in_place_type, base, size_t{10} - }; + std::in_place_type, base, size_t{10}}; test_default(abstract, 10); diff --git a/tests/svs/index/vamana/multi.cpp b/tests/svs/index/vamana/multi.cpp index 8989811b..af52864f 100644 --- a/tests/svs/index/vamana/multi.cpp +++ b/tests/svs/index/vamana/multi.cpp @@ -67,8 +67,7 @@ CATCH_TEMPLATE_TEST_CASE( const auto groundtruth = test_dataset::load_groundtruth(svs::distance_type_v); const svs::index::vamana::VamanaBuildParameters build_parameters{ - alpha, max_degree, 2 * max_degree, 1000, max_degree - 4, true - }; + alpha, max_degree, 2 * max_degree, 1000, max_degree - 4, true}; const auto search_parameters = svs::index::vamana::VamanaSearchParameters(); diff --git a/tests/svs/index/vamana/search_buffer.cpp b/tests/svs/index/vamana/search_buffer.cpp index 824f96c1..8ecb698a 100644 --- a/tests/svs/index/vamana/search_buffer.cpp +++ b/tests/svs/index/vamana/search_buffer.cpp @@ -596,8 +596,7 @@ CATCH_TEST_CASE("Fuzzing", "[core][search_buffer]") { auto run_test = [&](Cmp SVS_UNUSED(cmp)) { auto setup = FuzzSetup{num_trials, dataset_size, 32, 32, seed, allow_invalid}; auto buffer = svs::index::vamana::SearchBuffer{ - svs::index::vamana::SearchBufferConfig{32, 32} - }; + svs::index::vamana::SearchBufferConfig{32, 32}}; fuzz_test(buffer, setup); // Change size; @@ -918,8 +917,7 @@ CATCH_TEST_CASE("Fuzzing Mutable", "[core][search_buffer]") { auto run_test = [&](Cmp SVS_UNUSED(cmp)) { auto setup = FuzzSetup{num_trials, dataset_size, 32, 32, seed, allow_invalid}; auto buffer = svs::index::vamana::MutableBuffer{ - svs::index::vamana::SearchBufferConfig{32, 32} - }; + svs::index::vamana::SearchBufferConfig{32, 32}}; fuzz_test(buffer, setup); // Change size; diff --git a/tests/svs/lib/array.cpp b/tests/svs/lib/array.cpp index 3fe504a4..4c4eb06c 100644 --- a/tests/svs/lib/array.cpp +++ b/tests/svs/lib/array.cpp @@ -385,8 +385,7 @@ make_source_array(const Dims& dims, size_t n_elements, size_t id, Bools check_equal( alloc, std::array{ - id, 1, sizeof(T) * n_elements, 0, 0, n_elements, 0, 0, 0, 1, 0, 0, 0, 0 - } + id, 1, sizeof(T) * n_elements, 0, 0, n_elements, 0, 0, 0, 1, 0, 0, 0, 0} ); // Assign the contents based on whether the id is A or B. diff --git a/tests/svs/lib/dispatcher.cpp b/tests/svs/lib/dispatcher.cpp index a3f872e0..ea4f8b97 100644 --- a/tests/svs/lib/dispatcher.cpp +++ b/tests/svs/lib/dispatcher.cpp @@ -290,8 +290,7 @@ CATCH_TEST_CASE("Dispatcher2", "[lib][dispatcher2]") { // Passed by mutable reference - ensure we can mutate our argument // and have this mutation be visible to the caller. - if constexpr (is_mutable_reference_v && - is_mutable_reference_v) { + if constexpr (is_mutable_reference_v && is_mutable_reference_v) { arg.push_back(4); } }; @@ -341,8 +340,7 @@ CATCH_TEST_CASE("Dispatcher2", "[lib][dispatcher2]") { // Passed by mutable reference - ensure we can mutate our argument // and have this mutation be visible to the caller. - if constexpr (is_mutable_reference_v && - is_mutable_reference_v) { + if constexpr (is_mutable_reference_v && is_mutable_reference_v) { arg.value_ = 5; } }; diff --git a/tests/svs/lib/meta.cpp b/tests/svs/lib/meta.cpp index 3e4a75af..c27fdd9c 100644 --- a/tests/svs/lib/meta.cpp +++ b/tests/svs/lib/meta.cpp @@ -52,8 +52,7 @@ CATCH_TEST_CASE("Meta", "[lib][meta]") { CATCH_STATIC_REQUIRE( values == std::array{ - svs::DataType::float32, svs::DataType::uint8, svs::DataType::int64 - } + svs::DataType::float32, svs::DataType::uint8, svs::DataType::int64} ); } diff --git a/tests/svs/lib/saveload.cpp b/tests/svs/lib/saveload.cpp index bb2a690f..eb731e1e 100644 --- a/tests/svs/lib/saveload.cpp +++ b/tests/svs/lib/saveload.cpp @@ -376,8 +376,7 @@ struct BuiltIn { SVS_LOAD_MEMBER_AT_(table, bool), SVS_LOAD_MEMBER_AT_(table, str), SVS_LOAD_MEMBER_AT_(table, path), - SVS_LOAD_MEMBER_AT_(table, v) - }; + SVS_LOAD_MEMBER_AT_(table, v)}; } static svs::lib::TryLoadResult diff --git a/tests/svs/lib/threads/thread.cpp b/tests/svs/lib/threads/thread.cpp index bb774f3f..fb4ff371 100644 --- a/tests/svs/lib/threads/thread.cpp +++ b/tests/svs/lib/threads/thread.cpp @@ -455,8 +455,7 @@ CATCH_TEST_CASE("Control Block", "[core][threads][thread_control_block]") { CATCH_SECTION("Shutdown or Exception") { auto graceful_states = std::vector{ - svs::threads::ThreadState::Shutdown, svs::threads::ThreadState::Exception - }; + svs::threads::ThreadState::Shutdown, svs::threads::ThreadState::Exception}; for (auto state : graceful_states) { block.set_state(state); diff --git a/tests/svs/lib/threads/threadpool.cpp b/tests/svs/lib/threads/threadpool.cpp index ec589719..575d986e 100644 --- a/tests/svs/lib/threads/threadpool.cpp +++ b/tests/svs/lib/threads/threadpool.cpp @@ -117,9 +117,8 @@ CATCH_TEST_CASE("Thread Pool", "[core][threads][threadpool]") { [&](const auto& range, uint64_t tid) { std::lock_guard lock{mutex}; seen_threads.push_back(tid); - ranges.push_back( - threads::UnitRange{*(range.begin()), *(range.end())} - ); + ranges.push_back(threads::UnitRange{ + *(range.begin()), *(range.end())}); } ); } diff --git a/tests/utils/schemas.cpp b/tests/utils/schemas.cpp index dfdbf7fa..4434c88c 100644 --- a/tests/utils/schemas.cpp +++ b/tests/utils/schemas.cpp @@ -48,14 +48,12 @@ std::filesystem::path test_vtest_file() { // Expected contents for vtest std::vector> vtest_contents() { return std::vector>{ - {1.0, 2.0, 3.0, 4.0, 5.0}, {6.0, 7.0, 8.0, 9.0, 10.0} - }; + {1.0, 2.0, 3.0, 4.0, 5.0}, {6.0, 7.0, 8.0, 9.0, 10.0}}; } // Expected contents for v1 std::vector> v1_contents() { return std::vector>{ - {101.0, 102.0, 103.0, 104.0, 105.0}, {106.0, 107.0, 108.0, 109.0, 110.0} - }; + {101.0, 102.0, 103.0, 104.0, 105.0}, {106.0, 107.0, 108.0, 109.0, 110.0}}; } } // namespace test_schemas diff --git a/tests/utils/test_dataset.cpp b/tests/utils/test_dataset.cpp index d25dbb15..d1ff3aba 100644 --- a/tests/utils/test_dataset.cpp +++ b/tests/utils/test_dataset.cpp @@ -109,8 +109,7 @@ svs::graphs::SimpleBlockedGraph graph_blocked() { std::vector expected_out_neighbors() { return std::vector{ - 64, 103, 118, 45, 34, 31, 64, 121, 128, 128, 128, 128, 46, 71, 115, 112 - }; + 64, 103, 118, 45, 34, 31, 64, 121, 128, 128, 128, 128, 46, 71, 115, 112}; } // Helper to load the ground-truth for a given file. diff --git a/utils/assemble_vamana.cpp b/utils/assemble_vamana.cpp index 48c1798a..9190cf4d 100644 --- a/utils/assemble_vamana.cpp +++ b/utils/assemble_vamana.cpp @@ -60,8 +60,7 @@ void convert( std::move(data), svs::lib::narrow(entry_point), distance, - 1 - }; + 1}; index.set_alpha(alpha); index.set_construction_window_size(construction_window_size); diff --git a/utils/benchmarks/index_build.cpp b/utils/benchmarks/index_build.cpp index 3a572e43..a7a0af06 100644 --- a/utils/benchmarks/index_build.cpp +++ b/utils/benchmarks/index_build.cpp @@ -83,8 +83,7 @@ std::vector benchmark( build_setup.construction_window_size, 1000, build_setup.max_degree, - true - }; + true}; auto build_time = timer.push_back("index build"); auto index = svs::index::vamana::auto_build( diff --git a/utils/build_index.cpp b/utils/build_index.cpp index 05c044b1..4bc7af4b 100644 --- a/utils/build_index.cpp +++ b/utils/build_index.cpp @@ -59,8 +59,7 @@ void build_index( build_search_window_size, max_candidate_pool_size, max_degree, - true - }; + true}; auto index = svs::Vamana::build( parameters, svs::VectorDataLoader(vecs_filename), dist_type, n_threads @@ -143,8 +142,7 @@ int svs_main(std::vector args) { {"int8", build_index}, {"uint8", build_index}, {"float", build_index}, - {"float16", build_index} - }; + {"float16", build_index}}; auto it = dispatcher.find(data_type); if (it == dispatcher.end()) { diff --git a/utils/characterization/mutable.cpp b/utils/characterization/mutable.cpp index e26f71fb..7dd1fdab 100644 --- a/utils/characterization/mutable.cpp +++ b/utils/characterization/mutable.cpp @@ -122,7 +122,8 @@ struct Report { }; std::ostream& operator<<(std::ostream& stream, const Report& report) { - stream << "[" << report.message_ << "] -- {" << "operation: " << report.operation_time_ + stream << "[" << report.message_ << "] -- {" + << "operation: " << report.operation_time_ << ", groundtruth: " << report.groundtruth_time_ << ", search: " << report.search_time_ << ", recall: " << report.recall_ << "}"; return stream; @@ -278,8 +279,7 @@ int svs_main(std::vector args) { } svs::index::vamana::VamanaBuildParameters parameters{ - ALPHA, max_degree, 2 * max_degree, 1000, max_degree, true - }; + ALPHA, max_degree, 2 * max_degree, 1000, max_degree, true}; auto tic = svs::lib::now(); auto index = svs::index::vamana::MutableVamanaIndex( diff --git a/utils/convert_data_to_bfloat16.cpp b/utils/convert_data_to_bfloat16.cpp index d042880c..34e1d5ac 100644 --- a/utils/convert_data_to_bfloat16.cpp +++ b/utils/convert_data_to_bfloat16.cpp @@ -55,8 +55,7 @@ int svs_main(std::vector args) { std::cout << "Converting Bin data!" << std::endl; auto reader = svs::io::binary::BinaryReader{filename_f32}; auto writer = svs::io::binary::BinaryWriter{ - filename_bf16, reader.nvectors(), reader.ndims() - }; + filename_bf16, reader.nvectors(), reader.ndims()}; for (auto i : reader) { writer << i; } diff --git a/utils/convert_data_to_float16.cpp b/utils/convert_data_to_float16.cpp index 062c0106..96483203 100644 --- a/utils/convert_data_to_float16.cpp +++ b/utils/convert_data_to_float16.cpp @@ -54,8 +54,7 @@ int svs_main(std::vector args) { std::cout << "Converting Bin data!" << std::endl; auto reader = svs::io::binary::BinaryReader{filename_f32}; auto writer = svs::io::binary::BinaryWriter{ - filename_f16, reader.nvectors(), reader.ndims() - }; + filename_f16, reader.nvectors(), reader.ndims()}; for (auto i : reader) { writer << i; } diff --git a/utils/search_index.cpp b/utils/search_index.cpp index ae9cd4c9..08bc7314 100644 --- a/utils/search_index.cpp +++ b/utils/search_index.cpp @@ -159,8 +159,7 @@ int svs_main(std::vector&& args) { {{"float", "int8"}, search_index}, {{"float", "uint8"}, search_index}, {{"float", "float"}, search_index}, - {{"float", "float16"}, search_index} - }; + {{"float", "float16"}, search_index}}; auto it = dispatcher.find({query_data_type, db_data_type}); if (it == dispatcher.end()) { diff --git a/utils/search_index_numa.cpp b/utils/search_index_numa.cpp index 3103216e..46653f1c 100644 --- a/utils/search_index_numa.cpp +++ b/utils/search_index_numa.cpp @@ -188,8 +188,7 @@ int svs_main(std::vector args) { {{"float", "int8"}, search_index_numa}, {{"float", "uint8"}, search_index_numa}, {{"float", "float"}, search_index_numa}, - {{"float", "float16"}, search_index_numa} - }; + {{"float", "float16"}, search_index_numa}}; auto it = dispatcher.find({query_data_type, db_data_type}); if (it == dispatcher.end()) { @@ -206,12 +205,10 @@ int svs_main(std::vector args) { std::vector index_filenames{index_filename, index_filename}; std::vector graph_memory_styles{ - graph_memory_style_0, graph_memory_style_1 - }; + graph_memory_style_0, graph_memory_style_1}; std::vector graph_filenames{graph_filename_0, graph_filename_1}; std::vector data_memory_styles{ - data_memory_style, data_memory_style - }; + data_memory_style, data_memory_style}; std::vector data_filenames{data_filename, data_filename}; f(query_filename, diff --git a/utils/search_ivf.cpp b/utils/search_ivf.cpp index bef6e505..f1c42d4c 100644 --- a/utils/search_ivf.cpp +++ b/utils/search_ivf.cpp @@ -64,8 +64,7 @@ auto batch_queries( std::vector> query_batch; for (size_t batch = 0; batch < num_batches; ++batch) { auto this_batch = svs::threads::UnitRange{ - batch * batchsize, std::min((batch + 1) * batchsize, query_data.size()) - }; + batch * batchsize, std::min((batch + 1) * batchsize, query_data.size())}; query_batch.push_back( svs::data::SimpleData(this_batch.size(), query_data.dimensions()) ); @@ -195,8 +194,7 @@ int svs_main(std::vector&& args) { const auto dispatcher = std::map{ {{"float", "float16"}, search_index}, {{"float", "bfloat16"}, search_index}, - {{"float", "float"}, search_index} - }; + {{"float", "float"}, search_index}}; auto it = dispatcher.find({query_data_type, db_data_type}); if (it == dispatcher.end()) { From bd187f6986778459b0fbacdb8011e8a89d9da240 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Oct 2025 10:42:39 +0000 Subject: [PATCH 5/7] Address review feedback on copilot-instructions.md Co-authored-by: ahuber21 <9201869+ahuber21@users.noreply.github.com> --- .github/copilot-instructions.md | 45 ++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index ba605003..6b4c14d2 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -36,7 +36,6 @@ cmake -DCMAKE_BUILD_TYPE=RelWithDebugInfo \ -DSVS_BUILD_BINARIES=YES \ -DSVS_BUILD_TESTS=YES \ -DSVS_BUILD_EXAMPLES=YES \ - -DSVS_EXPERIMENTAL_LEANVEC=YES \ -DSVS_NO_AVX512=NO \ -DSVS_EXPERIMENTAL_ENABLE_IVF=OFF \ .. @@ -76,8 +75,6 @@ source /opt/intel/oneapi/setvars.sh | `SVS_EXPERIMENTAL_ENABLE_IVF` | OFF | Enable IVF (requires MKL) | | `CMAKE_BUILD_TYPE` | Release | Use `RelWithDebugInfo` for testing | -**Note**: The option `SVS_EXPERIMENTAL_LEANVEC` is recognized but not used internally (safe to set). - ## Code Formatting and Linting ### Formatting (ALWAYS run before committing) @@ -204,9 +201,9 @@ ScalableVectorSearch/ │ │ ├── vamana/ # Vamana graph index │ │ ├── flat/ # Flat (brute-force) index │ │ └── inverted/ # Inverted index (IVF) -│ ├── orchestrators/ # High-level APIs +│ ├── orchestrators/ # High-level APIs with type-erasure for simple interfaces │ ├── quantization/ # Vector quantization -│ └── extensions/ # ISA-specific optimizations +│ └── extensions/ # svs_invoke overloads to hook into core SVS routines ├── tests/ # ** C++ TEST SUITE ** │ ├── svs/ # Unit tests (mirrors include/svs/) │ ├── integration/ # Integration tests @@ -244,22 +241,19 @@ Main checks that run on every PR: 2. **pre-commit.yml**: Verifies code formatting with clang-format 15 3. **cibuildwheel.yml**: Builds Python wheels (uses custom manylinux2014 container) -**To replicate CI locally**: Use the exact cmake command from `build-linux.yml` (lines 70-77). +**To replicate CI locally**: Use the exact cmake command from `build-linux.yml` configuration step. ## Common Issues and Workarounds ### Build Issues -1. **Problem**: CMake configuration warns about unused `SVS_EXPERIMENTAL_LEANVEC` variable - - **Solution**: This is expected and harmless - the variable is accepted but not used - -2. **Problem**: Build fails with uninitialized variable warnings on GCC 12+ - - **Solution**: Already handled - GCC 12+ adds `-Wno-uninitialized` automatically (cmake/options.cmake:208) +1. **Problem**: Build fails with uninitialized variable warnings on GCC 12+ + - **Solution**: Already handled - GCC 12+ adds `-Wno-uninitialized` automatically in cmake/options.cmake -3. **Problem**: IVF tests fail or IVF won't build +2. **Problem**: IVF tests fail or IVF won't build - **Solution**: IVF requires Intel MKL - either install MKL or use `-DSVS_EXPERIMENTAL_ENABLE_IVF=OFF` -4. **Problem**: Tests timeout or take very long +3. **Problem**: Tests timeout or take very long - **Solution**: Integration tests can take 1-2 minutes; use specific test filters for faster iteration ### Formatting Issues @@ -289,7 +283,15 @@ cd build/tests && ./tests "[integration]" cd build/tests && ./tests --list-tags # Clean and rebuild -rm -rf build && mkdir build && cd build && cmake .. && make -j$(nproc) +rm -rf build && mkdir build && cd build +cmake -DCMAKE_BUILD_TYPE=RelWithDebugInfo \ + -DSVS_BUILD_BINARIES=YES \ + -DSVS_BUILD_TESTS=YES \ + -DSVS_BUILD_EXAMPLES=YES \ + -DSVS_NO_AVX512=NO \ + -DSVS_EXPERIMENTAL_ENABLE_IVF=OFF \ + .. +make -j$(nproc) ``` ## Important Notes for Coding Agents @@ -297,13 +299,16 @@ rm -rf build && mkdir build && cd build && cmake .. && make -j$(nproc) 1. **Trust these instructions first** - Only search the repository if information here is incomplete or incorrect 2. **Always build out-of-source** - Use a `build/` directory, never configure CMake in the repository root 3. **Follow the CI configuration** - Use the same cmake flags as `.github/workflows/build-linux.yml` for consistency -4. **Format before committing** - Run `./tools/clang-format.sh clang-format` to avoid CI failures +4. **Format before committing** - Run `./tools/clang-format.sh clang-format` to avoid CI failures. **IMPORTANT**: Only format files you modify; do not include formatting changes from other files in your PR 5. **Test early and often** - Build times are reasonable (~5-10 min), so test incrementally -6. **Header-only library** - Most code is in `include/svs/`, changes don't require recompiling everything -7. **ISA dispatching** - Runtime dispatch means the same binary runs on different CPU architectures -8. **Test filters are your friend** - Use Catch2 tags to run subsets of tests during development -9. **Python bindings are specialized** - Changes to template parameters may require Python binding updates -10. **Version is synchronized** - Keep version in sync across `CMakeLists.txt` (line 26), `setup.py` (line 43), and test files +6. **Tests are required** - New features and bugfixes must be accompanied by tests. For bugs, first reproduce the issue in a unit test, then fix it in the code +7. **Header-only library** - Most code is in `include/svs/`, changes don't require recompiling everything +8. **ISA dispatching** - Runtime dispatch means the same binary runs on different CPU architectures +9. **Type erasure** - Orchestrators use type-erasure to provide simple and consistent interfaces across different implementations +10. **Extensions system** - The `extensions/` directory provides `svs_invoke` overloads/specializations to hook into core SVS routines (similar to `std::invoke`) +11. **Test filters are your friend** - Use Catch2 tags to run subsets of tests during development +12. **Python bindings are specialized** - Changes to template parameters may require Python binding updates +13. **Version is synchronized** - Keep version in sync across `CMakeLists.txt`, `setup.py`, and test files ## Additional Resources From d4f1061fe71f9f83be61556a413c0a620b851069 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Oct 2025 11:00:33 +0000 Subject: [PATCH 6/7] Enhance copilot-instructions.md with deeper architectural understanding Co-authored-by: ahuber21 <9201869+ahuber21@users.noreply.github.com> --- .github/copilot-instructions.md | 65 ++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 6b4c14d2..ddced18f 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -2,17 +2,24 @@ ## Repository Overview -**Scalable Vector Search (SVS)** is a high-performance C++20 library for vector similarity search, optimized for Intel x86 architectures but portable to other platforms. The library implements state-of-the-art Vamana graph-based approximate nearest neighbor (ANN) search and supports billions of high-dimensional vectors with high accuracy and speed. SVS features: +**Scalable Vector Search (SVS)** is a high-performance C++20 library for vector similarity search, optimized for Intel x86 architectures but portable to other platforms. The library implements state-of-the-art Vamana graph-based approximate nearest neighbor (ANN) search and supports billions of high-dimensional vectors with high accuracy and speed. -- **Core language**: C++20 with modern concepts for optimal compiler optimizations -- **Can be used as**: Header-only library or with Python bindings -- **Runtime ISA dispatching**: Automatically uses best available instruction set (SSE, AVX2, AVX512) -- **Python bindings**: Require shape-specialized templates for different data dimensionalities -- **Key algorithms**: Vamana graph-based search, LVQ/LeanVec compression (proprietary, available via shared libraries) +**Architecture**: The library uses a layered design: +- **Low-level index implementations** (`include/svs/index/`) provide templated, performance-critical algorithms (Vamana, Flat, IVF) +- **Orchestrators** (`include/svs/orchestrators/`) wrap indices with type-erased interfaces, hiding template complexity for simpler APIs +- **Extensions** (`include/svs/extensions/`) use customization point objects (`svs_invoke`) to specialize behavior for different data types + +**Key features**: +- **Core language**: C++20 with modern concepts enabling compile-time optimizations and type safety +- **Deployment options**: Header-only library for integration, or pre-built Python bindings via PyPI +- **Multi-architecture support**: Runtime ISA dispatching selects optimal SIMD instructions (SSE, AVX2, AVX512) at load time +- **Compression**: LVQ/LeanVec proprietary compression (closed-source, available via shared libraries reduces memory footprint +- **Python bindings**: Template specialization for common dimensionalities enables efficient Python API without sacrificing performance **Repository size**: Medium (~10k LOC core library, extensive tests and examples) **Build system**: CMake 3.21+ with C++20 compiler (GCC 11+, Clang 15+) -**Test framework**: Catch2 v3.4.0 (unit tests), ctest (integration tests) +**Test framework**: Catch2 v3.4.0 (unit tests with `CATCH_` prefixed macros), ctest (integration tests) +**Performance focus**: The library uses extensive compile-time dispatch and template metaprogramming to generate optimized code paths for different data types and CPU architectures, enabling near-optimal performance without runtime overhead ## Critical Build Instructions @@ -180,7 +187,7 @@ ScalableVectorSearch/ │ └── pyproject.toml # Build configuration ├── cmake/ # CMake modules │ ├── options.cmake # ** BUILD OPTIONS (IMPORTANT) ** -│ ├── multi-arch.cmake # Multi-architecture support +│ ├── multi-arch.cmake # Multi-architecture support (SSE, AVX2, AVX512) │ └── *.cmake # Dependency configs (eve, fmt, spdlog, etc.) ├── data/ # Test data and schemas │ ├── test_dataset/ # Small test datasets @@ -188,27 +195,27 @@ ScalableVectorSearch/ ├── docker/ # Docker build environments ├── examples/ │ ├── cpp/ # C++ usage examples -│ │ ├── vamana.cpp # Main search example -│ │ ├── types.cpp # Supported types -│ │ ├── saveload.cpp # Save/load patterns -│ │ ├── dispatcher.cpp # Compile-time dispatch -│ │ └── shared/ # LVQ/LeanVec via shared library +│ │ ├── vamana.cpp # Basic search workflow (build, search, recall) +│ │ ├── types.cpp # Supported data types demonstration +│ │ ├── saveload.cpp # Index serialization/deserialization +│ │ ├── dispatcher.cpp # Compile-time type dispatch patterns +│ │ └── shared/ # Using LVQ/LeanVec via shared library │ └── python/ # Python examples ├── include/svs/ # ** CORE LIBRARY HEADERS ** │ ├── lib/ # Foundation: arrays, threads, I/O, SIMD │ ├── core/ # Core: distance, data structures, allocators │ ├── index/ # Index implementations -│ │ ├── vamana/ # Vamana graph index +│ │ ├── vamana/ # Vamana graph index (templated implementation) │ │ ├── flat/ # Flat (brute-force) index │ │ └── inverted/ # Inverted index (IVF) -│ ├── orchestrators/ # High-level APIs with type-erasure for simple interfaces -│ ├── quantization/ # Vector quantization -│ └── extensions/ # svs_invoke overloads to hook into core SVS routines +│ ├── orchestrators/ # High-level type-erased APIs wrapping indices for simpler use +│ ├── quantization/ # Vector quantization (scalar quantization implementations) +│ └── extensions/ # Customization points via svs_invoke for type-specific behavior ├── tests/ # ** C++ TEST SUITE ** -│ ├── svs/ # Unit tests (mirrors include/svs/) -│ ├── integration/ # Integration tests -│ ├── benchmark/ # Benchmark tests -│ └── utils/ # Test utilities +│ ├── svs/ # Unit tests (mirrors include/svs/ structure) +│ ├── integration/ # End-to-end integration tests +│ ├── benchmark/ # Benchmark framework tests +│ └── utils/ # Test utilities and reference implementations ├── tools/ │ ├── clang-format.sh # ** FORMATTING SCRIPT (USE THIS) ** │ └── benchmark_inputs/ # Benchmark configurations @@ -237,9 +244,9 @@ ScalableVectorSearch/ Main checks that run on every PR: -1. **build-linux.yml**: Builds with multiple compilers (g++-11, g++-12, clang++-15) in `RelWithDebugInfo` mode, runs all C++ tests and examples -2. **pre-commit.yml**: Verifies code formatting with clang-format 15 -3. **cibuildwheel.yml**: Builds Python wheels (uses custom manylinux2014 container) +1. **build-linux.yml**: Matrix build with multiple compilers (g++-11, g++-12, clang++-15) in `RelWithDebugInfo` mode. Tests both with and without IVF (Intel MKL). Runs full test suite and C++ examples (~5-10 min per configuration) +2. **pre-commit.yml**: Verifies code formatting with clang-format 15. Fails if any file doesn't match formatting standards +3. **cibuildwheel.yml**: Builds manylinux2014 Python wheels for multiple Python versions (3.9-3.12) using custom container with GCC devtoolset-11 **To replicate CI locally**: Use the exact cmake command from `build-linux.yml` configuration step. @@ -303,11 +310,11 @@ make -j$(nproc) 5. **Test early and often** - Build times are reasonable (~5-10 min), so test incrementally 6. **Tests are required** - New features and bugfixes must be accompanied by tests. For bugs, first reproduce the issue in a unit test, then fix it in the code 7. **Header-only library** - Most code is in `include/svs/`, changes don't require recompiling everything -8. **ISA dispatching** - Runtime dispatch means the same binary runs on different CPU architectures -9. **Type erasure** - Orchestrators use type-erasure to provide simple and consistent interfaces across different implementations -10. **Extensions system** - The `extensions/` directory provides `svs_invoke` overloads/specializations to hook into core SVS routines (similar to `std::invoke`) -11. **Test filters are your friend** - Use Catch2 tags to run subsets of tests during development -12. **Python bindings are specialized** - Changes to template parameters may require Python binding updates +8. **ISA dispatching** - Runtime dispatch means the same binary runs on different CPU architectures. The library detects available CPU features (SSE, AVX2, AVX512) at runtime and dispatches to optimized code paths +9. **Type erasure in orchestrators** - Orchestrators (e.g., `svs::Vamana`) use type-erasure to hide template complexity, providing simple consistent interfaces. The underlying templated index implementations remain in `include/svs/index/` +10. **Extensions system** - The `extensions/` directory uses customization point objects (`svs_invoke`) to hook into core SVS routines, similar to `std::invoke`. This allows specializing behavior for different data types (e.g., compressed vs. uncompressed vectors) without modifying core algorithms +11. **Test filters are your friend** - Use Catch2 tags (e.g., `[integration]`, `[vamana]`, `[core]`) to run subsets of tests during development +12. **Python bindings are specialized** - Python bindings pre-specialize templates for common vector dimensionalities. Changes to template parameters in C++ may require updating Python binding specializations 13. **Version is synchronized** - Keep version in sync across `CMakeLists.txt`, `setup.py`, and test files ## Additional Resources From a8853bb6c48f9bad235980d3993865651cab041a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Oct 2025 11:30:46 +0000 Subject: [PATCH 7/7] Add AI-generated content attribution policy Co-authored-by: ahuber21 <9201869+ahuber21@users.noreply.github.com> --- .github/copilot-instructions.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index ddced18f..20bc4e4c 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -309,13 +309,14 @@ make -j$(nproc) 4. **Format before committing** - Run `./tools/clang-format.sh clang-format` to avoid CI failures. **IMPORTANT**: Only format files you modify; do not include formatting changes from other files in your PR 5. **Test early and often** - Build times are reasonable (~5-10 min), so test incrementally 6. **Tests are required** - New features and bugfixes must be accompanied by tests. For bugs, first reproduce the issue in a unit test, then fix it in the code -7. **Header-only library** - Most code is in `include/svs/`, changes don't require recompiling everything -8. **ISA dispatching** - Runtime dispatch means the same binary runs on different CPU architectures. The library detects available CPU features (SSE, AVX2, AVX512) at runtime and dispatches to optimized code paths -9. **Type erasure in orchestrators** - Orchestrators (e.g., `svs::Vamana`) use type-erasure to hide template complexity, providing simple consistent interfaces. The underlying templated index implementations remain in `include/svs/index/` -10. **Extensions system** - The `extensions/` directory uses customization point objects (`svs_invoke`) to hook into core SVS routines, similar to `std::invoke`. This allows specializing behavior for different data types (e.g., compressed vs. uncompressed vectors) without modifying core algorithms -11. **Test filters are your friend** - Use Catch2 tags (e.g., `[integration]`, `[vamana]`, `[core]`) to run subsets of tests during development -12. **Python bindings are specialized** - Python bindings pre-specialize templates for common vector dimensionalities. Changes to template parameters in C++ may require updating Python binding specializations -13. **Version is synchronized** - Keep version in sync across `CMakeLists.txt`, `setup.py`, and test files +7. **AI-generated content attribution** - If content was added on behalf of Intel employees, add this line to the file header: `These contents may have been developed with support from one or more Intel-operated generative artificial intelligence solutions.` +8. **Header-only library** - Most code is in `include/svs/`, changes don't require recompiling everything +9. **ISA dispatching** - Runtime dispatch means the same binary runs on different CPU architectures. The library detects available CPU features (SSE, AVX2, AVX512) at runtime and dispatches to optimized code paths +10. **Type erasure in orchestrators** - Orchestrators (e.g., `svs::Vamana`) use type-erasure to hide template complexity, providing simple consistent interfaces. The underlying templated index implementations remain in `include/svs/index/` +11. **Extensions system** - The `extensions/` directory uses customization point objects (`svs_invoke`) to hook into core SVS routines, similar to `std::invoke`. This allows specializing behavior for different data types (e.g., compressed vs. uncompressed vectors) without modifying core algorithms +12. **Test filters are your friend** - Use Catch2 tags (e.g., `[integration]`, `[vamana]`, `[core]`) to run subsets of tests during development +13. **Python bindings are specialized** - Python bindings pre-specialize templates for common vector dimensionalities. Changes to template parameters in C++ may require updating Python binding specializations +14. **Version is synchronized** - Keep version in sync across `CMakeLists.txt`, `setup.py`, and test files ## Additional Resources