Milvus 简介
-
图像、音视频搜索领域 -
文本搜索、推荐和交互式问答系统等文本搜索领域 -
新药搜索、基因筛选等生物医药领域
Google Colab 简介
使用 Google Colab 运行 Milvus
-
下载并编译源码
-
启动 Milvus 服务
-
安装 pymilvus
! pip install pymilvus==0.2.14
-
连接服务端
# Connect to Milvus Server
milvus = Milvus(_HOST, _PORT)
# Return the status of the Milvus server
server_status = milvus.server_status(timeout=10)
-
创建集合 / 分区/ 索引
# Information needed to create a collection
param={'collection_name':collection_name, 'dimension': _DIM, 'index_file_size': _INDEX_FILE_SIZE, 'metric_type': MetricType.L2}
# Create a collection
milvus.create_collection(param, timeout=10)
# Create a partition for a collection
milvus.create_partition(collection_name=collection_name, partition_tag=partition_tag, timeout=10)
ivf_param = {'nlist': 16384}
# Create index for a collection
milvus.create_index(collection_name=collection_name, index_type=IndexType.IVF_FLAT, params=ivf_param)
-
插入数据并落盘
# Insert vectors to a collection
milvus.insert(collection_name=collection_name, records=vectors, ids=ids)
# Flush vector data in one collection or multiple collections to disk
milvus.flush(collection_name_array=[collection_name], timeout=None)
-
加载数据并检索
# Load a collection for caching
milvus.load_collection(collection_name=collection_name, timeout=None)
# Search vectors in a collection
search_param = { "nprobe": 16 }
milvus.search(collection_name=collection_name,query_records=[vectors[0]],partition_tags=None,top_k=10,params=search_param)
-
获取集合 / 索引信息
# Return information of a collection
milvus.get_collection_info(collection_name=collection_name, timeout=10)
# Show index information of a collection
milvus.get_index_info(collection_name=collection_name, timeout=10)
-
通过 ID 获取向量
# List the ids in segment
# you can get the segment_name list by get_collection_stats() function
milvus.list_id_in_segment(collection_name =collection_name, segment_name='1600328539015368000', timeout=None)
# Return raw vectors according to ids, and you can get the ids list by list_id_in_segment() function
milvus.get_entity_by_id(collection_name=collection_name, ids=[0], timeout=None)
-
获取 / 设置参数
milvus.get_config(parent_key='cache', child_key='cache_size')
milvus.set_config(parent_key='cache', child_key='cache_size', value='5G')
-
删除索引 / 向量 / 分区 / 集合
# Remove an index.
milvus.drop_index(collection_name=collection_name, timeout=None)
# Delete vectors in a collection by vector ID.
# id_array (list[int]) -- list of vector id
milvus.delete_entity_by_id(collection_name=collection_name, id_array=[0], timeout=None)
# Delete a partition in a collection.
milvus.drop_partition(collection_name=collection_name, partition_tag=partition_tag, timeout=None)
# Delete a collection by name.
milvus.drop_collection(collection_name=collection_name, timeout=10)
写在最后
更多 Milvus 实战系列文章
欢迎加入 Milvus 社区
本文分享自微信公众号 - ZILLIZ(Zilliztech)。
如有侵权,请联系 support@oschina.cn 删除。
本文参与“OSC源创计划”,欢迎正在阅读的你也加入,一起分享。