当前位置: 首页 > news >正文

ES练习册

es索引结构和数据实例

这里提供索引结构和数据实例提供给大家使用练习,希望大家能够一起成长进步~~~~

#添加索引
PUT /ecommerce_products
{"settings": {"number_of_shards": 3,"number_of_replicas": 1,"analysis": {"analyzer": {"custom_lowercase": {"type": "custom","tokenizer": "standard","filter": ["lowercase"]}}}},"mappings": {"dynamic": "strict","properties": {"product_id": { "type": "keyword" }, "name": {"type": "text","fields": {"keyword": { "type": "keyword", "ignore_above": 256 }},"analyzer": "custom_lowercase"},"description": { "type": "text" },"price": { "type": "scaled_float", "scaling_factor": 100 },"in_stock": { "type": "boolean" },"categories": { "type": "keyword" },"tags": {"type": "keyword","eager_global_ordinals": true},"created_at": { "type": "date","format": "strict_date_optional_time||epoch_millis"},"last_updated": { "type": "date" },"rating": { "type": "half_float" },"reviews": {"type": "nested","properties": {"user_id": { "type": "keyword" },"score": { "type": "byte" },"comment": { "type": "text" },"created_at": { "type": "date" },"responses": {"type": "nested","properties": {"admin_id": { "type": "keyword" },"response_text": { "type": "text" }}}}},"warehouse_location": { "type": "geo_point" },"supplier_info": {"properties": {"name": { "type": "keyword" },"ip_address": { "type": "ip" }}},"suggest": { "type": "completion" },"attributes": {"type": "object","dynamic": true},"name_translations": {"type": "object","properties": {"en": { "type": "text" },"zh": { "type": "text", "analyzer": "ik_max_word" },"es": { "type": "text" }}}}}
}#查询索引
GET /ecommerce_products#添加文档数据
#POST/索引库名/_doc/文档idPOST /ecommerce_products/_doc/1
{"product_id": "P123456","name": "无线蓝牙耳机","description": "高端降噪耳机,30小时超长续航","price": 199.99,"in_stock": true,"categories": ["电子产品", "音频设备"],"tags": ["无线", "降噪", "新款"],"created_at": "2023-10-05T08:23:45Z","last_updated": "2024-02-15","rating": 4.5,"reviews": [{"user_id": "U1001","score": 5,"comment": "音质非常出色","created_at": "2023-12-01","responses": [{"admin_id": "ADM001","response_text": "感谢您的反馈!"}]}],"warehouse_location": {"lat": 40.7128,"lon": -74.0060},"supplier_info": {"name": "音科技术公司","ip_address": "192.168.1.100"},"suggest": {"input": ["耳机", "蓝牙", "无线"],"weight": 34},"attributes": {"颜色": "黑色","重量_kg": 0.25},"name_translations": {"en": "Wireless Headphones","zh": "无线蓝牙耳机","es": "Audífonos inalámbricos Bluetooth"}
}POST /ecommerce_products/_doc/2
{"product_id": "P789012","name": "智能健身手环","description": "防水运动手环,支持心率监测和睡眠追踪","price": 79.99,"in_stock": false,"categories": ["可穿戴设备", "运动健康"],"tags": ["防水", "心率监测", "入门款"],"created_at": "2023-11-20","last_updated": "2024-03-10T14:30:00","rating": 4.2,"reviews": [{"user_id": "U2002","score": 4,"comment": "性价比不错,但APP需要改进","created_at": "2024-01-15","responses": [{"admin_id": "ADM002","response_text": "我们会持续优化软件体验"}]}],"warehouse_location": "31.2304,121.4737","supplier_info": {"name": "智动科技","ip_address": "10.20.30.40"},"suggest": {"input": ["手环", "运动", "健康"],"weight": 28},"attributes": {"腕带材质": "硅胶","屏幕类型": "OLED"}
}POST /ecommerce_products/_doc/3
{"product_id": "P345678","name": "智能温控水杯","description": "支持APP控制的保温水杯,12小时长效保温","price": 149.5,"in_stock": true,"categories": ["生活家电", "智能硬件"],"tags": ["保温", "智能控制", "礼品"],"created_at": "2024-02-01T09:15:30Z","rating": 4.8,"reviews": [{"user_id": "U3003","score": 5,"comment": "冬天保持水温效果非常好","created_at": "2024-03-01","responses": []}],"warehouse_location": "22.3193,114.1694","supplier_info": {"name": "智联家居","ip_address": "172.16.0.100"},"suggest": {"input": ["水杯", "保温杯", "智能"],"weight": 42},"attributes": {"容量_ml": 500,"材质": "不锈钢"},"name_translations": {"en": "Smart Thermos Cup","zh": "智能温控水杯","es": "Taza termo inteligente"}
}POST /ecommerce_products/_doc/4
{"product_id": "P456789","name": "健康监测智能手表","description": "1.5英寸AMOLED屏,支持血氧心率监测,50米防水","price": 899.0,"in_stock": true,"categories": ["可穿戴设备", "健康监测"],"tags": ["防水", "长续航", "新品"],"created_at": "2024-03-20T10:00:00Z","rating": 4.7,"reviews": [{"user_id": "U4004","score": 5,"comment": "游泳监测非常准确","created_at": "2024-04-05","responses": [{"admin_id": "ADM003","response_text": "感谢选择我们的产品!"}]},{"user_id": "U3003","score": 5,"comment": "喜欢","created_at": "2024-04-05","responses": [{"admin_id": "ADM003","response_text": "感谢选择我们的产品!"}]}],"warehouse_location": "22.2833,114.1667",  "supplier_info": {"name": "健康科技集团","ip_address": "10.88.10.25"},"attributes": {"颜色": "曜石黑","表带材质": "氟橡胶","电池容量_mAh": 450},"name_translations": {"en": "Health Monitoring Smart Watch","zh": "健康监测智能手表","es": "Taza termo inteligente"}
}POST /ecommerce_products/_doc/5
{"product_id": "P567890","name": "静音空气净化器","description": "CADR 600m³/h,智能感应PM2.5,适用80㎡空间","price": 2499.0,"in_stock": true,"categories": ["生活家电", "环境电器"],"tags": ["智能感应", "静音"],"created_at": "2023-12-15","rating": 4.3,"reviews": [{"user_id": "U3003","score": 4,"comment": "睡眠模式确实安静","created_at": "2024-01-10","responses": []}],"warehouse_location": { "lat": 30.5728,"lon": 104.0668},  "supplier_info": {"name": "清洁科技公司","ip_address": "172.20.10.5"},"attributes": {"滤芯类型": "复合滤网","噪音分贝": 28,"适用面积_㎡": 80},"name_translations": {"en": "Health Monitoring Smart Watch","zh": "空气净化器","es": "Taza termo inteligente"}
}POST /ecommerce_products/_doc/6
{"product_id": "P678901","name": "全掌气垫跑步鞋","description": "轻量透气网面,ZOOM AIR缓震技术","price": 699.0,"in_stock": false,"categories": ["运动服饰", "鞋类"],"tags": ["促销", "限量款"],"created_at": "2024-02-28T14:30:00Z","rating": 4.6,"reviews": [{"user_id": "U6006","score": 2,"comment": "尺码偏小建议买大一码","created_at": "2024-03-15","responses": [{"admin_id": "ADM002","response_text": "已反馈给质检部门改进"}]}],"warehouse_location": "23.1291,113.2644",  "supplier_info": {"name": "运动装备制造","ip_address": "192.168.2.200"},"attributes": {"颜色": "荧光绿/黑","尺码": "42","重量_g": 320},"name_translations": {"en": "Health Monitoring Smart Watch","zh": "全掌气垫跑步鞋","es": "Taza termo inteligente"}
}POST /ecommerce_products/_doc/8
{"product_id": "P890123","name": "4K全景云台摄像头","description": "360°全景追踪,红外夜视,支持AI人形检测","price": 399.0,"in_stock": true,"categories": ["智能硬件", "安防"],"tags": ["夜视", "AI识别"],"created_at": "2024-01-10T08:00:00Z","rating": 4.0,"reviews": [{"user_id": "U8008","score": 2,"comment": "夜间画质有待提升","created_at": "2024-02-01","responses": []}],"warehouse_location": "39.9042,116.4074",  "supplier_info": {"name": "智能安防科技","ip_address": "192.168.5.100"},"attributes": {"原价": "599","存储方式": ["云存储", "本地SD卡"],"夜视距离_米": 15},"name_translations": {"en": "Health Monitoring Smart Watch","zh": "4K全景云台摄像头","es": "Taza termo inteligente"}
}#查询文档
#GET/索引库名/_doc/文档id
GET /ecommerce_products/_doc/1#增量修改文档
#POST/索引库名/_update/文档id
POST /ecommerce_products/_update/8
{"doc": {"tags": ["夜视", "AI识别","促销"]}
}

相关性算分专线练习

一、function_score 的核心作用

function_score 允许您通过自定义规则修改文档的原始相关性评分 (_score),实现以下目标:

  • 提升特定文档的排名(如促销商品、高库存商品)

  • 降低无关文档的排名(如过期内容)

  • 完全自定义排序逻辑(如结合价格、评分、时间等多因素)

二、基础语法结构

GET /索引名/_search
{"query": {"function_score": {"query": { ... },       // 主查询(基础结果集)"functions": [ ... ],   // 评分函数列表"score_mode": "sum",    // 函数评分的组合方式"boost_mode": "sum",    // 最终评分与原始评分的组合方式"max_boost": 10.0,      // 函数评分的最大限制"min_score": 2.0        // 结果过滤阈值(评分低于此值的文档被排除)}},"sort": [ { "_score": "desc" } ]
}

三、核心参数详解

1. query (主查询)
  • 作用:定义基础查询,决定哪些文档会被处理。

  • 示例:匹配所有文档或特定条件。

"query": { "match_all": {} }
2. functions (评分函数)

支持多种函数类型,可同时使用多个函数:

函数类型作用示例
weight固定权重值{ "weight": 2.0, "filter": { "term": { ... } } }
field_value_factor基于字段值的评分计算{ "field": "rating", "factor": 1.5 }
random_score随机评分(需指定种子){ "random_score": { "seed": 用户ID } }
script_score自定义脚本计算评分{ "script": "doc['price'].value * 0.1" }
decay按距离/时间衰减评分(如高斯衰减)见下文详细示例
3. score_mode (函数评分组合方式)
模式说明
sum所有函数评分相加(默认)
avg取平均值
max取最大值
min取最小值
multiply所有函数评分相乘
4. boost_mode (最终评分计算方式)
模式公式
sum最终分 = 原始分 + 函数评分
replace最终分 = 函数评分
multiply最终分 = 原始分 × 函数评分
avg最终分 = (原始分 + 函数评分)/2

 四、实战练习

#需求:
#搜索所有商品,但为有库存(in_stock=true)的商品评分增加 50% 权重,同时为评分≥4 的商品额外增加 30% 权重。最终评分按加权值排序。
GET /ecommerce_products/_search
{"query": {"function_score": {"query": {"match_all": {}},"functions": [{"filter": {"term": {"in_stock": "true"}},"weight": 1.5},{"filter": {"range": {"rating": {"gte": 4.2}}},"weight": 1.3}],"score_mode": "sum","boost_mode": "replace"}},"sort": [ { "_score": "desc" } ]
}

Aggs聚合

大白话的理解就是:

你让他aggs一下type,他就把不同类型type以及数量返回给你

一、聚合的核心概念

聚合(Aggregations) 是 Elasticsearch 中用于对数据进行统计分析的功能,类似于 SQL 的 GROUP BY + 统计函数(如 COUNTAVG)。
核心用途

  • 数据分组统计(如按分类统计商品数量)

  • 计算指标(如平均价格、最高评分)

  • 多维度交叉分析(如时间+地域的销售分布)

二、基础语法

{"aggs": {"agg_name": {"agg_type": {"field": "field_name","size": 10 (可选参数,具体取决于聚合类型)}}}
}
  • agg_name: 自定义的聚合名称,用于标识聚合结果。
  • agg_type: 聚合的类型,例如 termsavgsum 等。
  • field_name: 指定要进行聚合操作的字段。

三、常见聚合类型

Terms Aggregation (按值分组)

  • 用于根据字段的唯一值对文档进行分组。
  • 适用于分析离散值,如标签、类别等。
{"aggs": {"group_by_status": {"terms": {"field": "status.keyword"}}}
}

Range Aggregation (范围分组)

  • 用于根据数值区间对文档进行分组。
{"aggs": {"price_ranges": {"range": {"field": "price","ranges": [{ "to": 100 },{ "from": 100, "to": 200 },{ "from": 200 }]}}}
}

Date Histogram Aggregation (日期直方图)

  • 按时间间隔对文档进行分组。
{"aggs": {"sales_over_time": {"date_histogram": {"field": "sale_date","calendar_interval": "month"}}}
}

Avg, Sum, Min, Max Aggregations (统计计算)

  • 用于计算字段的平均值、总和、最小值、最大值。
{"aggs": {"average_price": {"avg": {"field": "price"}}}
}

Nested Aggregation (嵌套聚合)

  • 在嵌套字段上进行聚合。
{"aggs": {"nested_comments": {"nested": {"path": "comments"},"aggs": {"comment_authors": {"terms": {"field": "comments.author"}}}}}
}

组合聚合

  • 可以将多个聚合嵌套在一起,以得到更复杂的查询。例如,首先对一个字段进行分组,然后对每个分组进行统计计算。
{"aggs": {"group_by_status": {"terms": {"field": "status.keyword"},"aggs": {"average_price": {"avg": {"field": "price"}}}}}
}

四、实战练习

#需求:
#统计每个商品的评论回复率(有回复的评论数 / 总评论数),按回复率降序排列
GET /ecommerce_products/_search
{"size": 0,"aggs": {"products": {"nested": { "path": "reviews" },"aggs": {"total_comments": { "value_count": { "field": "reviews.user_id" } },"replied_comments": {"nested": { "path": "reviews.responses" },"aggs": { "response_count": { "value_count": { "field": "reviews.responses.admin_id" } } }}}}}
}

五、字段详解

bucket_script 

作用:对于已有的聚合结果进行二次计算,应用于计算比例、差值等

基础语法:

{"aggs": {"自定义聚合名称": {"bucket_script": {"buckets_path": {"变量1": "聚合路径1","变量2": "聚合路径2"},"script": "params.变量1 + params.变量2"}}}
}

脚本

doc['field']的通用规则

适用字段类型

  • 适用字段类型:仅适用于非文本字段(如 integerlongdoubledatekeyword 等)。

  • 不支持文本字段:如果字段是 text 类型且未设置 fielddata=true,直接访问 doc['text_field'] 会报错。

  • 返回值类型:返回一个 FieldValues 对象,需通过特定方法获取值。

常用方法

(1) .value
  • 作用:获取字段的第一个值(单值字段直接取值,多值字段取第一个值)。

  • 示例

// 单值字段:直接返回数值
double score = doc['score'].value;// 多值字段:返回第一个值(如 [80, 90] 返回 80)
int firstScore = doc['scores'].value;
(2) .values
  • 作用:获取字段的所有值(返回数组形式)。

  • 示例

// 多值字段:返回数组 [80, 90]
double[] scores = doc['scores'].values;// 遍历多值字段
for (double s : doc['scores'].values) {total += s;
}
(3) .size()
  • 作用:获取字段值的数量(单值字段返回 1,多值字段返回实际数量)。

  • 示例

if (doc['score'].size() > 0) {  // 判断字段是否存在且非空return doc['score'].value;
} else {return 0;
}
(4) .empty
  • 作用:检查字段是否为空(无值)。

  • 示例

if (!doc['score'].empty) {return doc['score'].value;
}
(5) .get(int index)
  • 作用:获取数组字段的指定索引值

  • 示例

// 获取多值字段的第二个值
if (doc['scores'].size() >= 2) {double secondScore = doc['scores'].get(1);
}
(6) .count()
  • 作用:统计字段中非空值的数量(与 .size() 不同,可能过滤空值)。

  • 示例

int validScores = doc['scores'].count();
(7) 聚合方法
  • 适用场景:对多值字段进行聚合计算。

  • 常用方法

.min()  // 最小值
.max()  // 最大值
.sum()  // 总和
.average()  // 平均值
方法用途示例场景
.value获取单值字段值或多值字段的第一个值单值数值计算
.values获取多值字段的所有值遍历求和、求平均
.size()获取值的数量(包括空值)判断字段是否为空
.empty检查字段是否为空空值保护逻辑
.get(index)获取多值字段的指定索引值访问特定位置的元素
.min()/.max()多值字段的最小值/最大值统计范围筛选

补充 

1. size().count() 和 .empty 方法

以下是针对 Elasticsearch Painless 脚本中 doc['field'] 的 .size().count() 和 .empty 方法在处理 空值、数组、字段不存在 时的具体行为示例。

示例数据

假设 Elasticsearch 中有如下文档:

{"id": 1,"name": "Alice","scores": [85, 90, null],   // 多值字段,包含 null"empty_array": [],          // 空数组"nullable_field": null      // 单值字段,值为 null
}
  • 注意:字段 missing_field 不存在

Painless 脚本示例

// 检查 scores 字段
def scores_size = doc['scores'].size();      // 返回 3
def scores_count = doc['scores'].count();    // 返回 2(过滤 null)
def scores_empty = doc['scores'].empty;      // 返回 false// 检查 empty_array 字段
def empty_array_size = doc['empty_array'].size();  // 返回 0
def empty_array_count = doc['empty_array'].count();// 返回 0
def empty_array_empty = doc['empty_array'].empty;  // 返回 true// 检查 nullable_field 字段
def nullable_size = doc['nullable_field'].size();  // 返回 1(单值 null)
def nullable_count = doc['nullable_field'].count();// 返回 0(过滤 null)
def nullable_empty = doc['nullable_field'].empty;  // 返回 false(字段存在)// 检查 missing_field 字段
def missing_size = doc['missing_field'].size();    // 抛出异常(字段不存在)
def missing_empty = doc['missing_field'].empty;    // 抛出异常(字段不存在)
字段类型.size().count().empty
多值带 null
(如 scores: [85, 90, null])
返回 总数量(含 null)
示例:3
返回 非 null 值数量
示例:2
false(存在且非空数组)
空数组
(如 empty_array: [])
00true(存在但无值)
单值 null
(如 nullable_field: null)
1(单值字段视为数组 [null])0(过滤 null)false(字段存在)
字段不存在
(如 missing_field)
报错报错报错

 2.判空处理

// 正确做法:先检查字段是否存在
if (doc.containsKey('score') && !doc['score'].empty) {return doc['score'].value;
} else {return 0;
}

相关文章:

  • C++ AVL树的实现
  • 《AI大模型应知应会100篇》第38篇:大模型与知识图谱结合的应用模式
  • 【创新实训项目博客】数据库搭建
  • 简单了解Java的I/O流机制与文件读写操作
  • Flink 时态维度表 Join 与缓存机制实战
  • NFC 碰一碰发视频贴牌技术,音频功能的开发实践与技术解析
  • WinForm真入门(17)——NumericUpDown控件详解
  • 全星APQP软件系统:驱动芯片半导体行业研发管理迈向高效与合规新高度
  • 每日算法-250427
  • 【Pandas】pandas DataFrame rtruediv
  • 2025.4.22 JavaScript 常用事件学习笔记
  • 开源财务软件:企业财务数字化转型的有力工具
  • TensorFlow 安装全攻略
  • Shell脚本-until语法结构
  • 香港GPU显卡服务器与GPU云服务器的区别
  • Tomcat的安装与配置
  • 【C++详解】C++入门(二)引用、内联函数、nullptr宏
  • Spark-Streaming核心编程:有状态转化操作与DStream输出
  • 高中数学联赛模拟试题精选第13套几何题
  • 【PyCharm- Python- ArcGIS】:安装一个和 ArcGIS 不冲突的独立 Python让PyCharm 使用 (解决全过程记录)
  • 扎克伯格怕“错过风口”?Meta AI数字伴侣被允许与未成年人讨论不当话题
  • 我国首个核电工业操作系统发布,将在华龙一号新机组全面应用
  • 从地下金库到地上IP,看海昏汉文化“最美变装”
  • 迎接神十九乘组回家,东风着陆场各项工作已准备就绪
  • 公安部知识产权犯罪侦查局:侦破盗录传播春节档院线电影刑案25起
  • 可移动可变形的新型超材料问世