ES练习册
es索引结构和数据实例
这里提供索引结构和数据实例提供给大家使用练习,希望大家能够一起成长进步~~~~
#添加索引
PUT /ecommerce_products
{"settings": {"number_of_shards": 3,"number_of_replicas": 1,"analysis": {"analyzer": {"custom_lowercase": {"type": "custom","tokenizer": "standard","filter": ["lowercase"]}}}},"mappings": {"dynamic": "strict","properties": {"product_id": { "type": "keyword" }, "name": {"type": "text","fields": {"keyword": { "type": "keyword", "ignore_above": 256 }},"analyzer": "custom_lowercase"},"description": { "type": "text" },"price": { "type": "scaled_float", "scaling_factor": 100 },"in_stock": { "type": "boolean" },"categories": { "type": "keyword" },"tags": {"type": "keyword","eager_global_ordinals": true},"created_at": { "type": "date","format": "strict_date_optional_time||epoch_millis"},"last_updated": { "type": "date" },"rating": { "type": "half_float" },"reviews": {"type": "nested","properties": {"user_id": { "type": "keyword" },"score": { "type": "byte" },"comment": { "type": "text" },"created_at": { "type": "date" },"responses": {"type": "nested","properties": {"admin_id": { "type": "keyword" },"response_text": { "type": "text" }}}}},"warehouse_location": { "type": "geo_point" },"supplier_info": {"properties": {"name": { "type": "keyword" },"ip_address": { "type": "ip" }}},"suggest": { "type": "completion" },"attributes": {"type": "object","dynamic": true},"name_translations": {"type": "object","properties": {"en": { "type": "text" },"zh": { "type": "text", "analyzer": "ik_max_word" },"es": { "type": "text" }}}}}
}#查询索引
GET /ecommerce_products#添加文档数据
#POST/索引库名/_doc/文档idPOST /ecommerce_products/_doc/1
{"product_id": "P123456","name": "无线蓝牙耳机","description": "高端降噪耳机,30小时超长续航","price": 199.99,"in_stock": true,"categories": ["电子产品", "音频设备"],"tags": ["无线", "降噪", "新款"],"created_at": "2023-10-05T08:23:45Z","last_updated": "2024-02-15","rating": 4.5,"reviews": [{"user_id": "U1001","score": 5,"comment": "音质非常出色","created_at": "2023-12-01","responses": [{"admin_id": "ADM001","response_text": "感谢您的反馈!"}]}],"warehouse_location": {"lat": 40.7128,"lon": -74.0060},"supplier_info": {"name": "音科技术公司","ip_address": "192.168.1.100"},"suggest": {"input": ["耳机", "蓝牙", "无线"],"weight": 34},"attributes": {"颜色": "黑色","重量_kg": 0.25},"name_translations": {"en": "Wireless Headphones","zh": "无线蓝牙耳机","es": "Audífonos inalámbricos Bluetooth"}
}POST /ecommerce_products/_doc/2
{"product_id": "P789012","name": "智能健身手环","description": "防水运动手环,支持心率监测和睡眠追踪","price": 79.99,"in_stock": false,"categories": ["可穿戴设备", "运动健康"],"tags": ["防水", "心率监测", "入门款"],"created_at": "2023-11-20","last_updated": "2024-03-10T14:30:00","rating": 4.2,"reviews": [{"user_id": "U2002","score": 4,"comment": "性价比不错,但APP需要改进","created_at": "2024-01-15","responses": [{"admin_id": "ADM002","response_text": "我们会持续优化软件体验"}]}],"warehouse_location": "31.2304,121.4737","supplier_info": {"name": "智动科技","ip_address": "10.20.30.40"},"suggest": {"input": ["手环", "运动", "健康"],"weight": 28},"attributes": {"腕带材质": "硅胶","屏幕类型": "OLED"}
}POST /ecommerce_products/_doc/3
{"product_id": "P345678","name": "智能温控水杯","description": "支持APP控制的保温水杯,12小时长效保温","price": 149.5,"in_stock": true,"categories": ["生活家电", "智能硬件"],"tags": ["保温", "智能控制", "礼品"],"created_at": "2024-02-01T09:15:30Z","rating": 4.8,"reviews": [{"user_id": "U3003","score": 5,"comment": "冬天保持水温效果非常好","created_at": "2024-03-01","responses": []}],"warehouse_location": "22.3193,114.1694","supplier_info": {"name": "智联家居","ip_address": "172.16.0.100"},"suggest": {"input": ["水杯", "保温杯", "智能"],"weight": 42},"attributes": {"容量_ml": 500,"材质": "不锈钢"},"name_translations": {"en": "Smart Thermos Cup","zh": "智能温控水杯","es": "Taza termo inteligente"}
}POST /ecommerce_products/_doc/4
{"product_id": "P456789","name": "健康监测智能手表","description": "1.5英寸AMOLED屏,支持血氧心率监测,50米防水","price": 899.0,"in_stock": true,"categories": ["可穿戴设备", "健康监测"],"tags": ["防水", "长续航", "新品"],"created_at": "2024-03-20T10:00:00Z","rating": 4.7,"reviews": [{"user_id": "U4004","score": 5,"comment": "游泳监测非常准确","created_at": "2024-04-05","responses": [{"admin_id": "ADM003","response_text": "感谢选择我们的产品!"}]},{"user_id": "U3003","score": 5,"comment": "喜欢","created_at": "2024-04-05","responses": [{"admin_id": "ADM003","response_text": "感谢选择我们的产品!"}]}],"warehouse_location": "22.2833,114.1667", "supplier_info": {"name": "健康科技集团","ip_address": "10.88.10.25"},"attributes": {"颜色": "曜石黑","表带材质": "氟橡胶","电池容量_mAh": 450},"name_translations": {"en": "Health Monitoring Smart Watch","zh": "健康监测智能手表","es": "Taza termo inteligente"}
}POST /ecommerce_products/_doc/5
{"product_id": "P567890","name": "静音空气净化器","description": "CADR 600m³/h,智能感应PM2.5,适用80㎡空间","price": 2499.0,"in_stock": true,"categories": ["生活家电", "环境电器"],"tags": ["智能感应", "静音"],"created_at": "2023-12-15","rating": 4.3,"reviews": [{"user_id": "U3003","score": 4,"comment": "睡眠模式确实安静","created_at": "2024-01-10","responses": []}],"warehouse_location": { "lat": 30.5728,"lon": 104.0668}, "supplier_info": {"name": "清洁科技公司","ip_address": "172.20.10.5"},"attributes": {"滤芯类型": "复合滤网","噪音分贝": 28,"适用面积_㎡": 80},"name_translations": {"en": "Health Monitoring Smart Watch","zh": "空气净化器","es": "Taza termo inteligente"}
}POST /ecommerce_products/_doc/6
{"product_id": "P678901","name": "全掌气垫跑步鞋","description": "轻量透气网面,ZOOM AIR缓震技术","price": 699.0,"in_stock": false,"categories": ["运动服饰", "鞋类"],"tags": ["促销", "限量款"],"created_at": "2024-02-28T14:30:00Z","rating": 4.6,"reviews": [{"user_id": "U6006","score": 2,"comment": "尺码偏小建议买大一码","created_at": "2024-03-15","responses": [{"admin_id": "ADM002","response_text": "已反馈给质检部门改进"}]}],"warehouse_location": "23.1291,113.2644", "supplier_info": {"name": "运动装备制造","ip_address": "192.168.2.200"},"attributes": {"颜色": "荧光绿/黑","尺码": "42","重量_g": 320},"name_translations": {"en": "Health Monitoring Smart Watch","zh": "全掌气垫跑步鞋","es": "Taza termo inteligente"}
}POST /ecommerce_products/_doc/8
{"product_id": "P890123","name": "4K全景云台摄像头","description": "360°全景追踪,红外夜视,支持AI人形检测","price": 399.0,"in_stock": true,"categories": ["智能硬件", "安防"],"tags": ["夜视", "AI识别"],"created_at": "2024-01-10T08:00:00Z","rating": 4.0,"reviews": [{"user_id": "U8008","score": 2,"comment": "夜间画质有待提升","created_at": "2024-02-01","responses": []}],"warehouse_location": "39.9042,116.4074", "supplier_info": {"name": "智能安防科技","ip_address": "192.168.5.100"},"attributes": {"原价": "599","存储方式": ["云存储", "本地SD卡"],"夜视距离_米": 15},"name_translations": {"en": "Health Monitoring Smart Watch","zh": "4K全景云台摄像头","es": "Taza termo inteligente"}
}#查询文档
#GET/索引库名/_doc/文档id
GET /ecommerce_products/_doc/1#增量修改文档
#POST/索引库名/_update/文档id
POST /ecommerce_products/_update/8
{"doc": {"tags": ["夜视", "AI识别","促销"]}
}
相关性算分专线练习
一、function_score
的核心作用
function_score
允许您通过自定义规则修改文档的原始相关性评分 (_score
),实现以下目标:
-
提升特定文档的排名(如促销商品、高库存商品)
-
降低无关文档的排名(如过期内容)
-
完全自定义排序逻辑(如结合价格、评分、时间等多因素)
二、基础语法结构
GET /索引名/_search
{"query": {"function_score": {"query": { ... }, // 主查询(基础结果集)"functions": [ ... ], // 评分函数列表"score_mode": "sum", // 函数评分的组合方式"boost_mode": "sum", // 最终评分与原始评分的组合方式"max_boost": 10.0, // 函数评分的最大限制"min_score": 2.0 // 结果过滤阈值(评分低于此值的文档被排除)}},"sort": [ { "_score": "desc" } ]
}
三、核心参数详解
1. query
(主查询)
-
作用:定义基础查询,决定哪些文档会被处理。
-
示例:匹配所有文档或特定条件。
"query": { "match_all": {} }
2. functions
(评分函数)
支持多种函数类型,可同时使用多个函数:
函数类型 | 作用 | 示例 |
---|---|---|
weight | 固定权重值 | { "weight": 2.0, "filter": { "term": { ... } } } |
field_value_factor | 基于字段值的评分计算 | { "field": "rating", "factor": 1.5 } |
random_score | 随机评分(需指定种子) | { "random_score": { "seed": 用户ID } } |
script_score | 自定义脚本计算评分 | { "script": "doc['price'].value * 0.1" } |
decay | 按距离/时间衰减评分(如高斯衰减) | 见下文详细示例 |
3. score_mode
(函数评分组合方式)
模式 | 说明 |
---|---|
sum | 所有函数评分相加(默认) |
avg | 取平均值 |
max | 取最大值 |
min | 取最小值 |
multiply | 所有函数评分相乘 |
4. boost_mode
(最终评分计算方式)
模式 | 公式 |
---|---|
sum | 最终分 = 原始分 + 函数评分 |
replace | 最终分 = 函数评分 |
multiply | 最终分 = 原始分 × 函数评分 |
avg | 最终分 = (原始分 + 函数评分)/2 |
四、实战练习
#需求:
#搜索所有商品,但为有库存(in_stock=true)的商品评分增加 50% 权重,同时为评分≥4 的商品额外增加 30% 权重。最终评分按加权值排序。
GET /ecommerce_products/_search
{"query": {"function_score": {"query": {"match_all": {}},"functions": [{"filter": {"term": {"in_stock": "true"}},"weight": 1.5},{"filter": {"range": {"rating": {"gte": 4.2}}},"weight": 1.3}],"score_mode": "sum","boost_mode": "replace"}},"sort": [ { "_score": "desc" } ]
}
Aggs聚合
大白话的理解就是:
你让他aggs一下type,他就把不同类型type以及数量返回给你
一、聚合的核心概念
聚合(Aggregations) 是 Elasticsearch 中用于对数据进行统计分析的功能,类似于 SQL 的 GROUP BY
+ 统计函数(如 COUNT
、AVG
)。
核心用途:
-
数据分组统计(如按分类统计商品数量)
-
计算指标(如平均价格、最高评分)
-
多维度交叉分析(如时间+地域的销售分布)
二、基础语法
{"aggs": {"agg_name": {"agg_type": {"field": "field_name","size": 10 (可选参数,具体取决于聚合类型)}}}
}
- agg_name: 自定义的聚合名称,用于标识聚合结果。
- agg_type: 聚合的类型,例如
terms
、avg
、sum
等。 - field_name: 指定要进行聚合操作的字段。
三、常见聚合类型
Terms Aggregation (按值分组)
- 用于根据字段的唯一值对文档进行分组。
- 适用于分析离散值,如标签、类别等。
{"aggs": {"group_by_status": {"terms": {"field": "status.keyword"}}}
}
Range Aggregation (范围分组)
- 用于根据数值区间对文档进行分组。
{"aggs": {"price_ranges": {"range": {"field": "price","ranges": [{ "to": 100 },{ "from": 100, "to": 200 },{ "from": 200 }]}}}
}
Date Histogram Aggregation (日期直方图)
- 按时间间隔对文档进行分组。
{"aggs": {"sales_over_time": {"date_histogram": {"field": "sale_date","calendar_interval": "month"}}}
}
Avg, Sum, Min, Max Aggregations (统计计算)
- 用于计算字段的平均值、总和、最小值、最大值。
{"aggs": {"average_price": {"avg": {"field": "price"}}}
}
Nested Aggregation (嵌套聚合)
- 在嵌套字段上进行聚合。
{"aggs": {"nested_comments": {"nested": {"path": "comments"},"aggs": {"comment_authors": {"terms": {"field": "comments.author"}}}}}
}
组合聚合
- 可以将多个聚合嵌套在一起,以得到更复杂的查询。例如,首先对一个字段进行分组,然后对每个分组进行统计计算。
{"aggs": {"group_by_status": {"terms": {"field": "status.keyword"},"aggs": {"average_price": {"avg": {"field": "price"}}}}}
}
四、实战练习
#需求:
#统计每个商品的评论回复率(有回复的评论数 / 总评论数),按回复率降序排列
GET /ecommerce_products/_search
{"size": 0,"aggs": {"products": {"nested": { "path": "reviews" },"aggs": {"total_comments": { "value_count": { "field": "reviews.user_id" } },"replied_comments": {"nested": { "path": "reviews.responses" },"aggs": { "response_count": { "value_count": { "field": "reviews.responses.admin_id" } } }}}}}
}
五、字段详解
bucket_script
作用:对于已有的聚合结果进行二次计算,应用于计算比例、差值等
基础语法:
{"aggs": {"自定义聚合名称": {"bucket_script": {"buckets_path": {"变量1": "聚合路径1","变量2": "聚合路径2"},"script": "params.变量1 + params.变量2"}}}
}
脚本
doc['field']的通用规则
适用字段类型
-
适用字段类型:仅适用于非文本字段(如
integer
、long
、double
、date
、keyword
等)。 -
不支持文本字段:如果字段是
text
类型且未设置fielddata=true
,直接访问doc['text_field']
会报错。 -
返回值类型:返回一个
FieldValues
对象,需通过特定方法获取值。
常用方法
(1) .value
-
作用:获取字段的第一个值(单值字段直接取值,多值字段取第一个值)。
-
示例:
// 单值字段:直接返回数值
double score = doc['score'].value;// 多值字段:返回第一个值(如 [80, 90] 返回 80)
int firstScore = doc['scores'].value;
(2) .values
-
作用:获取字段的所有值(返回数组形式)。
-
示例:
// 多值字段:返回数组 [80, 90]
double[] scores = doc['scores'].values;// 遍历多值字段
for (double s : doc['scores'].values) {total += s;
}
(3) .size()
-
作用:获取字段值的数量(单值字段返回
1
,多值字段返回实际数量)。 -
示例:
if (doc['score'].size() > 0) { // 判断字段是否存在且非空return doc['score'].value;
} else {return 0;
}
(4) .empty
-
作用:检查字段是否为空(无值)。
-
示例:
if (!doc['score'].empty) {return doc['score'].value;
}
(5) .get(int index)
-
作用:获取数组字段的指定索引值。
-
示例:
// 获取多值字段的第二个值
if (doc['scores'].size() >= 2) {double secondScore = doc['scores'].get(1);
}
(6) .count()
-
作用:统计字段中非空值的数量(与
.size()
不同,可能过滤空值)。 -
示例:
int validScores = doc['scores'].count();
(7) 聚合方法
-
适用场景:对多值字段进行聚合计算。
-
常用方法:
.min() // 最小值
.max() // 最大值
.sum() // 总和
.average() // 平均值
方法 | 用途 | 示例场景 |
---|---|---|
.value | 获取单值字段值或多值字段的第一个值 | 单值数值计算 |
.values | 获取多值字段的所有值 | 遍历求和、求平均 |
.size() | 获取值的数量(包括空值) | 判断字段是否为空 |
.empty | 检查字段是否为空 | 空值保护逻辑 |
.get(index) | 获取多值字段的指定索引值 | 访问特定位置的元素 |
.min()/.max() | 多值字段的最小值/最大值 | 统计范围筛选 |
补充
1. size()
、.count()
和 .empty
方法
以下是针对 Elasticsearch Painless 脚本中
doc['field']
的.size()
、.count()
和.empty
方法在处理 空值、数组、字段不存在 时的具体行为示例。
示例数据
假设 Elasticsearch 中有如下文档:
{"id": 1,"name": "Alice","scores": [85, 90, null], // 多值字段,包含 null"empty_array": [], // 空数组"nullable_field": null // 单值字段,值为 null
}
-
注意:字段
missing_field
不存在。
Painless 脚本示例
// 检查 scores 字段
def scores_size = doc['scores'].size(); // 返回 3
def scores_count = doc['scores'].count(); // 返回 2(过滤 null)
def scores_empty = doc['scores'].empty; // 返回 false// 检查 empty_array 字段
def empty_array_size = doc['empty_array'].size(); // 返回 0
def empty_array_count = doc['empty_array'].count();// 返回 0
def empty_array_empty = doc['empty_array'].empty; // 返回 true// 检查 nullable_field 字段
def nullable_size = doc['nullable_field'].size(); // 返回 1(单值 null)
def nullable_count = doc['nullable_field'].count();// 返回 0(过滤 null)
def nullable_empty = doc['nullable_field'].empty; // 返回 false(字段存在)// 检查 missing_field 字段
def missing_size = doc['missing_field'].size(); // 抛出异常(字段不存在)
def missing_empty = doc['missing_field'].empty; // 抛出异常(字段不存在)
字段类型 | .size() | .count() | .empty |
---|---|---|---|
多值带 null (如 scores: [85, 90, null] ) | 返回 总数量(含 null) 示例: 3 | 返回 非 null 值数量 示例: 2 | false (存在且非空数组) |
空数组 (如 empty_array: [] ) | 0 | 0 | true (存在但无值) |
单值 null (如 nullable_field: null ) | 1 (单值字段视为数组 [null]) | 0 (过滤 null) | false (字段存在) |
字段不存在 (如 missing_field ) | 报错 | 报错 | 报错 |
2.判空处理
// 正确做法:先检查字段是否存在
if (doc.containsKey('score') && !doc['score'].empty) {return doc['score'].value;
} else {return 0;
}