基于deepseek的智能语音客服【第三讲】知识库封装
1.构建知识库结构
字段根据自己的实际业务封装,这里是简化的知识库结构
Arrays.asList(
FieldType.newBuilder().withName("title").withDataType(DataType.VarChar)
.withPrimaryKey(true).withNullable(true).withMaxLength(300).build(),
FieldType.newBuilder().withName("text").withDataType(DataType.VarChar)
.withAutoID(false).withMaxLength(10000).build(),
FieldType.newBuilder().withName("vector").withDataType(DataType.FloatVector)
.withAutoID(false).withDimension(384).build(),
FieldType.newBuilder().withName("source").withDataType(DataType.VarChar)
.withMaxLength(200).build()
)
2.构建索引
/**
* 小规模数据:FLAT(精确搜索)。中大规模:IVF_FLAT、IVF_SQ8(量化索引,节省内存)。高维数据:HNSW(高效近似搜索)。
*/
IndexType indexType = IndexType.FLAT; // 常用索引类型FLAT
String indexName = fieldName+"_index"; // 自定义索引名称
int nlist = 128; // 聚类单元数(根据数据量调整)
// 构造索引参数
CreateIndexParam createIndexParam = CreateIndexParam.newBuilder()
.withCollectionName(collectionName)
.withFieldName(fieldName)
.withIndexName(indexName)
.withIndexType(indexType)
.withMetricType(MetricType.L2) // 根据模型选择度量类型(如 L2、IP)
.withExtraParam(String.format("{\"nlist\":%d}", nlist))
.build();
// 创建索引
R<RpcStatus> response = client.createIndex(createIndexParam);
if (response.getStatus() != R.Status.Success.getCode()) {
throw new RuntimeException("索引创建失败: " + response.getMessage());
}
System.out.println("索引创建成功!");
3.插入数据(插入数据之前要做向量化处理、向量化要调用python调用模型,下一篇补充)
// 构造插入参数数据
Map<String, List<?>> fields = new HashMap<>();
fields.put("title", Collections.singletonList(title));
fields.put("text", Collections.singletonList(des));
fields.put("vector", Collections.singletonList(vector));
// 插入数据
List<InsertParam.Field> insertFields = new ArrayList<>();
for (Map.Entry<String, List<?>> entry : fields.entrySet()) {
insertFields.add(new InsertParam.Field(entry.getKey(), entry.getValue()));
}
InsertParam insertParam = InsertParam.newBuilder()
.withCollectionName(collection_name)
.withFields(insertFields)
.build();
R<MutationResult> response = client.insert(insertParam);
if (response.getStatus() != R.Status.Success.getCode()) {
throw new RuntimeException("数据插入失败:" + response.getMessage());
}
System.out.println("数据插入成功!");
4.向量化检索与解析(检索之前同样需要向量化处理)
if (embedding == null || embedding.isEmpty()) {
throw new IllegalArgumentException("Embedding cannot be null or empty");
}
// 加载集合
client.loadCollection(LoadCollectionParam.newBuilder()
.withCollectionName(collection_name)
.build());
// 构造搜索参数
SearchParam searchParam = SearchParam.newBuilder()
.withCollectionName(collection_name)
.withMetricType(MetricType.L2)
.withTopK(topK)
.withVectorFieldName("vector")
.withOutFields(outFileds)
.withFloatVectors(Collections.singletonList(embedding))
.withParams("{\"nprobe\": 10}")
.build();
// 执行搜索
R<SearchResults> response = client.search(searchParam);
if (response.getStatus() != R.Status.Success.getCode()) {
throw new RuntimeException("Milvus 搜索失败: " + response.getMessage());
}
// 执行搜索
SearchResultsWrapper wrapper = new SearchResultsWrapper(response.getData().getResults());
List<SearchResultsWrapper.IDScore> scores = wrapper.getIDScore(0);
List<Map<String, Object>> results = new ArrayList<>();
int index = 0; // 使用计数器变量来跟踪索引
for (SearchResultsWrapper.IDScore score: scores) {
Map<String, Object> item = new HashMap<>();
for (String field : outFileds) {
if (field=="vector") continue;
item.put(field, wrapper.getFieldWrapper(field).getFieldData().get(index));
}
results.add(item);
index++; // 每次循环后递增计数器
}
5.其他检索方式
关键词检索
// 参数验证
if (filter == null || filter.isEmpty()) {
throw new IllegalArgumentException("Filter cannot be null or empty");
}
if (outFields == null || outFields.isEmpty()) {
throw new IllegalArgumentException("Output fields cannot be null or empty");
}
if (collection_name == null || collection_name.isEmpty()) {
throw new IllegalArgumentException("Collection name cannot be null or empty");
}
boolean isLoaded =client.hasCollection(HasCollectionParam.newBuilder()
.withCollectionName(collection_name)
.build()).getData();
if (!isLoaded) {
throw new RuntimeException("Failed to load collection: " + collection_name);
}
// 加载集合
client.loadCollection(LoadCollectionParam.newBuilder()
.withCollectionName(collection_name)
.build());
// 构造查询参数
QuerySimpleParam queryParam = QuerySimpleParam.newBuilder()
.withCollectionName(collection_name)
.withFilter(filter)
.withOutputFields(outFields)
.build();
// 执行查询
R<QueryResponse> response = client.query(queryParam);
if (response.getStatus() != R.Status.Success.getCode()) {
throw new RuntimeException("Milvus 查询失败: " + response.getMessage());
}
// 处理查询结果
List<QueryResultsWrapper.RowRecord> results = response.getData().getRowRecords();
List<Map<String, Object>> formattedResults = new ArrayList<>();
for (QueryResultsWrapper.RowRecord result : results) {
Map<String, Object> row = new HashMap<>();
for (String field : outFields) {
if (field=="vector") continue;
row.put(field, result.getFieldValues().get(field));
}
formattedResults.add(row);
}