llama-index==0.10.19
Occasionally you may want to use multiple indices for query, such as making a custom chatbot that talks on selected topics. And you would like to avoid recalculating embeddings.
Note that for large dataset, it is more efficient to filter indexed Nodes by metadata keywords instead of such merging.
nodes = []
for path in ['doc1_persist', 'doc2_persist', 'doc3_persist']:
# read stored index from file
storage_context = StorageContext.from_defaults(persist_dir=path)
index = load_index_from_storage(storage_context)
vector_store_dict = index.storage_context.vector_store.to_dict()
embedding_dict = vector_store_dict['embedding_dict']
for doc_id, node in index.storage_context.docstore.docs.items():
# necessary to avoid re-calc of embeddings
node.embedding = embedding_dict[doc_id]
nodes.append(node)
merged_index = VectorStoreIndex(nodes=nodes)
# now make queries using the merged index