99#include " math_utils.h"
1010#include " index.h"
1111#include " partition.h"
12+ #include " program_options_utils.hpp"
1213
1314namespace po = boost::program_options;
1415
@@ -21,61 +22,65 @@ int main(int argc, char **argv)
2122 bool append_reorder_data = false ;
2223 bool use_opq = false ;
2324
24- po::options_description desc{" Arguments" };
25+ po::options_description desc{
26+ program_options_utils::make_program_description (" build_disk_index" , " Build a disk-based index." )};
2527 try
2628 {
2729 desc.add_options ()(" help,h" , " Print information on arguments" );
28- desc.add_options ()(" data_type" , po::value<std::string>(&data_type)->required (), " data type <int8/uint8/float>" );
29- desc.add_options ()(" dist_fn" , po::value<std::string>(&dist_fn)->required (), " distance function <l2/mips>" );
30- desc.add_options ()(" data_path" , po::value<std::string>(&data_path)->required (),
31- " Input data file in bin format" );
32- desc.add_options ()(" index_path_prefix" , po::value<std::string>(&index_path_prefix)->required (),
33- " Path prefix for saving index file components" );
34- desc.add_options ()(" max_degree,R" , po::value<uint32_t >(&R)->default_value (64 ), " Maximum graph degree" );
35- desc.add_options ()(" Lbuild,L" , po::value<uint32_t >(&L)->default_value (100 ),
36- " Build complexity, higher value results in better graphs" );
37- desc.add_options ()(" search_DRAM_budget,B" , po::value<float >(&B)->required (),
38- " DRAM budget in GB for searching the index to set the "
39- " compressed level for data while search happens" );
40- desc.add_options ()(" build_DRAM_budget,M" , po::value<float >(&M)->required (),
41- " DRAM budget in GB for building the index" );
42- desc.add_options ()(" num_threads,T" , po::value<uint32_t >(&num_threads)->default_value (omp_get_num_procs ()),
43- " Number of threads used for building index (defaults to "
44- " omp_get_num_procs())" );
45- desc.add_options ()(" QD" , po::value<uint32_t >(&QD)->default_value (0 ), " Quantized Dimension for compression" );
46- desc.add_options ()(" codebook_prefix" , po::value<std::string>(&codebook_prefix)->default_value (" " ),
47- " Path prefix for pre-trained codebook" );
48- desc.add_options ()(" PQ_disk_bytes" , po::value<uint32_t >(&disk_PQ)->default_value (0 ),
49- " Number of bytes to which vectors should be compressed "
50- " on SSD; 0 for no compression" );
51- desc.add_options ()(" append_reorder_data" , po::bool_switch ()->default_value (false ),
52- " Include full precision data in the index. Use only in "
53- " conjuction with compressed data on SSD." );
54- desc.add_options ()(" build_PQ_bytes" , po::value<uint32_t >(&build_PQ)->default_value (0 ),
55- " Number of PQ bytes to build the index; 0 for full "
56- " precision build" );
57- desc.add_options ()(" use_opq" , po::bool_switch ()->default_value (false ),
58- " Use Optimized Product Quantization (OPQ)." );
59- desc.add_options ()(" label_file" , po::value<std::string>(&label_file)->default_value (" " ),
60- " Input label file in txt format for Filtered Index build ."
61- " The file should contain comma separated filters for each node "
62- " with each line corresponding to a graph node" );
63- desc.add_options ()(" universal_label" , po::value<std::string>(&universal_label)->default_value (" " ),
64- " Universal label, Use only in conjuction with label file for "
65- " filtered "
66- " index build. If a graph node has all the labels against it, we "
67- " can "
68- " assign a special universal filter to the point instead of comma "
69- " separated filters for that point" );
70- desc.add_options ()(" FilteredLbuild" , po::value<uint32_t >(&Lf)->default_value (0 ),
71- " Build complexity for filtered points, higher value "
72- " results in better graphs" );
73- desc.add_options ()(" filter_threshold,F" , po::value<uint32_t >(&filter_threshold)->default_value (0 ),
74- " Threshold to break up the existing nodes to generate new graph "
75- " internally where each node has a maximum F labels." );
76- desc.add_options ()(" label_type" , po::value<std::string>(&label_type)->default_value (" uint" ),
77- " Storage type of Labels <uint/ushort>, default value is uint which "
78- " will consume memory 4 bytes per filter" );
30+
31+ // Required parameters
32+ po::options_description required_configs (" Required" );
33+ required_configs.add_options ()(" data_type" , po::value<std::string>(&data_type)->required (),
34+ program_options_utils::DATA_TYPE_DESCRIPTION);
35+ required_configs.add_options ()(" dist_fn" , po::value<std::string>(&dist_fn)->required (),
36+ program_options_utils::DISTANCE_FUNCTION_DESCRIPTION);
37+ required_configs.add_options ()(" index_path_prefix" , po::value<std::string>(&index_path_prefix)->required (),
38+ program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION);
39+ required_configs.add_options ()(" data_path" , po::value<std::string>(&data_path)->required (),
40+ program_options_utils::INPUT_DATA_PATH);
41+ required_configs.add_options ()(" search_DRAM_budget,B" , po::value<float >(&B)->required (),
42+ " DRAM budget in GB for searching the index to set the "
43+ " compressed level for data while search happens" );
44+ required_configs.add_options ()(" build_DRAM_budget,M" , po::value<float >(&M)->required (),
45+ " DRAM budget in GB for building the index" );
46+
47+ // Optional parameters
48+ po::options_description optional_configs (" Optional" );
49+ optional_configs.add_options ()(" num_threads,T" ,
50+ po::value<uint32_t >(&num_threads)->default_value (omp_get_num_procs ()),
51+ program_options_utils::NUMBER_THREADS_DESCRIPTION);
52+ optional_configs.add_options ()(" max_degree,R" , po::value<uint32_t >(&R)->default_value (64 ),
53+ program_options_utils::MAX_BUILD_DEGREE);
54+ optional_configs.add_options ()(" Lbuild,L" , po::value<uint32_t >(&L)->default_value (100 ),
55+ program_options_utils::GRAPH_BUILD_COMPLEXITY);
56+ optional_configs.add_options ()(" QD" , po::value<uint32_t >(&QD)->default_value (0 ),
57+ " Quantized Dimension for compression" );
58+ optional_configs.add_options ()(" codebook_prefix" , po::value<std::string>(&codebook_prefix)->default_value (" " ),
59+ " Path prefix for pre-trained codebook" );
60+ optional_configs.add_options ()(" PQ_disk_bytes" , po::value<uint32_t >(&disk_PQ)->default_value (0 ),
61+ " Number of bytes to which vectors should be compressed "
62+ " on SSD; 0 for no compression" );
63+ optional_configs.add_options ()(" append_reorder_data" , po::bool_switch ()->default_value (false ),
64+ " Include full precision data in the index. Use only in "
65+ " conjuction with compressed data on SSD." );
66+ optional_configs.add_options ()(" build_PQ_bytes" , po::value<uint32_t >(&build_PQ)->default_value (0 ),
67+ program_options_utils::BUIlD_GRAPH_PQ_BYTES);
68+ optional_configs.add_options ()(" use_opq" , po::bool_switch ()->default_value (false ),
69+ program_options_utils::USE_OPQ);
70+ optional_configs.add_options ()(" label_file" , po::value<std::string>(&label_file)->default_value (" " ),
71+ program_options_utils::LABEL_FILE);
72+ optional_configs.add_options ()(" universal_label" , po::value<std::string>(&universal_label)->default_value (" " ),
73+ program_options_utils::UNIVERSAL_LABEL);
74+ optional_configs.add_options ()(" FilteredLbuild" , po::value<uint32_t >(&Lf)->default_value (0 ),
75+ program_options_utils::FILTERED_LBUILD);
76+ optional_configs.add_options ()(" filter_threshold,F" , po::value<uint32_t >(&filter_threshold)->default_value (0 ),
77+ " Threshold to break up the existing nodes to generate new graph "
78+ " internally where each node has a maximum F labels." );
79+ optional_configs.add_options ()(" label_type" , po::value<std::string>(&label_type)->default_value (" uint" ),
80+ program_options_utils::LABEL_TYPE_DESCRIPTION);
81+
82+ // Merge required and optional parameters
83+ desc.add (required_configs).add (optional_configs);
7984
8085 po::variables_map vm;
8186 po::store (po::parse_command_line (argc, argv, desc), vm);
0 commit comments