-
Notifications
You must be signed in to change notification settings - Fork 498
Open
Description
After creating a vector index with custom name which is vector_index, the following error message will be returned when using dataset.prewarm_index('vector_index'):
RuntimeError: LanceError(Index): Column vector_index does not exist in the schema, /Users/runner/work/lance/lance/rust/lance/src/index.rs:1155:31
This is my reproduce code:
import random
import string
import lance
import numpy as np
from lance.vector import vec_to_table
import pyarrow as pa
def create_table(nvec=1000, ndim=128, nans=0, nullify=False, dtype=np.float32):
mat = np.random.randn(nvec, ndim)
if nans > 0:
nans_mat = np.empty((nans, ndim))
nans_mat[:] = np.nan
mat = np.concatenate((mat, nans_mat), axis=0)
mat = mat.astype(dtype)
price = np.random.rand(nvec + nans) * 100
def gen_str(n):
return "".join(random.choices(string.ascii_letters + string.digits, k=n))
meta = np.array([gen_str(100) for _ in range(nvec + nans)])
tbl = (
vec_to_table(data=mat)
.append_column("price", pa.array(price))
.append_column("meta", pa.array(meta))
.append_column("id", pa.array(range(nvec + nans)))
)
if nullify:
idx = tbl.schema.get_field_index("vector")
vecs = tbl[idx].to_pylist()
nullified = [vec if i % 2 == 0 else None for i, vec in enumerate(vecs)]
field = tbl.schema.field(idx)
vecs = pa.array(nullified, field.type)
tbl = tbl.set_column(idx, field, vecs)
return tbl
if __name__ == "__main__":
tbl = create_table()
tmp_path = "/tmp/test_dataset.lance"
dataset = lance.write_dataset(tbl, tmp_path, data_storage_version="2.1")
dataset = dataset.create_index(
"vector", name="vector_index", index_type="IVF_PQ", num_partitions=4, num_sub_vectors=16
)
# Prewarm the index
dataset.prewarm_index("vector_index")
Metadata
Metadata
Assignees
Labels
No labels