Skip to content

Could not prewarm a vector index when using a custom index name #5411

@xloya

Description

@xloya

After creating a vector index with custom name which is vector_index, the following error message will be returned when using dataset.prewarm_index('vector_index'):

RuntimeError: LanceError(Index): Column vector_index does not exist in the schema, /Users/runner/work/lance/lance/rust/lance/src/index.rs:1155:31

This is my reproduce code:

import random
import string

import lance
import numpy as np
from lance.vector import vec_to_table
import pyarrow as pa


def create_table(nvec=1000, ndim=128, nans=0, nullify=False, dtype=np.float32):
    mat = np.random.randn(nvec, ndim)
    if nans > 0:
        nans_mat = np.empty((nans, ndim))
        nans_mat[:] = np.nan
        mat = np.concatenate((mat, nans_mat), axis=0)
    mat = mat.astype(dtype)
    price = np.random.rand(nvec + nans) * 100

    def gen_str(n):
        return "".join(random.choices(string.ascii_letters + string.digits, k=n))

    meta = np.array([gen_str(100) for _ in range(nvec + nans)])
    tbl = (
        vec_to_table(data=mat)
        .append_column("price", pa.array(price))
        .append_column("meta", pa.array(meta))
        .append_column("id", pa.array(range(nvec + nans)))
    )
    if nullify:
        idx = tbl.schema.get_field_index("vector")
        vecs = tbl[idx].to_pylist()
        nullified = [vec if i % 2 == 0 else None for i, vec in enumerate(vecs)]
        field = tbl.schema.field(idx)
        vecs = pa.array(nullified, field.type)
        tbl = tbl.set_column(idx, field, vecs)
    return tbl


if __name__ == "__main__":
    tbl = create_table()
    tmp_path = "/tmp/test_dataset.lance"
    dataset = lance.write_dataset(tbl, tmp_path, data_storage_version="2.1")
    dataset = dataset.create_index(
        "vector", name="vector_index", index_type="IVF_PQ", num_partitions=4, num_sub_vectors=16
    )
    # Prewarm the index
    dataset.prewarm_index("vector_index")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions