当前位置：首页 > news >正文

Postgresql源码（143）统计信息基础知识（带实例）

news 2025/7/11 15:51:26

概念与总结

高频值（Most Common Values, MCV）
- 存储在 most_common_vals 中。
- 每个高频值的频率通过 most_common_freqs 单独记录（例如 0.010966667 等）。
- MCV 用于优化等值查询（如 poid = 33），直接通过频率计算选择率，无需依赖直方图。
直方图（Histogram Bounds）
- 仅覆盖非高频值（未出现在MCV中的值）的分布。
- 用于优化范围查询（如poid BETWEEN 1050 AND 1200），通过分桶插值估算选择率。
correlation：物理与逻辑顺序相关性
- correlation 表示列值的物理存储顺序与逻辑顺序（升序/降序）的线性相关性。
- 取值范围为 [-1, 1]
- 1：完全正相关（物理顺序与逻辑顺序一致，如自增主键）。
- -1：完全负相关（物理顺序与逻辑顺序相反）
- 0：无相关性（随机存储）。

实例一：3000万高频重复值（1-999）

CREATE TABLE ii (poid INT NOT NULL, value NUMERIC, status int);
-- 分布均匀的话，采样会非常准确。
insert into ii select t.i%1000, t.i, 0 from generate_series(1,30000000) t(i);
CREATE INDEX idx_n_poid ON ii(poid);
analyze ii;

统计信息

postgres=# select * from pg_stats where tablename='ii' and attname='poid';
-[ RECORD 1 ]----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname             | public
tablename              | ii
attname                | poid
inherited              | f
null_frac              | 0
avg_width              | 4
n_distinct             | 1000
most_common_vals       | {334,580,881,431,2,33,79,112}
most_common_freqs      | {0.0016,0.0015333333,0.0015333333,0.0015,0.0014666667,0.0014666667,0.0014666667,0.0014666667}
histogram_bounds       | {0,10,20,30,41,52,61,71,82,91,99,109,119,129,139,149,159,168,178,189,197,207,217,226,236,246,256,266,276,286,296,306,315,325,336,346,356,365,374,386,396,406,417,427,438,448,458,468,478,488,498,508,518,528,537,549,558,569,578,590,599,609,619,629,639,649,659,669,679,687,697,707,719,729,740,750,760,771,781,791,800,810,820,829,839,849,859,869,880,891,901,910,921,931,941,950,961,970,980,990,999}
correlation            | 0.002622716
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |postgres=# select count(distinct poid) from ii;count
-------1000
(1 row)postgres=# select count(1) from ii;count
----------30000000
(1 row)

默认采样行数：300 × default_statistics_target（默认default_statistics_target=100，即采样30000行）
n_distinct = 1000
- 唯一值个数与真实值相等，分布均匀的情况下采样最准。
most_common_vals = {334,580,881,431,2,33,79,112}
- 数据均匀分布，随机采3万MCV的值也比较平均
histogram_bounds = {0,10,20,30,41,52,61,71,82,91,99,109,119,129,139,149,159,168,178,189,197,207,217,226,236,246,256,266,276,286,296,306,315,325,336,346,356,365,374,386,396,406,417,427,438,448,458,468,478,488,498,508,518,528,537,549,558,569,578,590,599,609,619,629,639,649,659,669,679,687,697,707,719,729,740,750,760,771,781,791,800,810,820,829,839,849,859,869,880,891,901,910,921,931,941,950,961,970,980,990,999}
- 直方图看起来比较平均，因为MCV比较少。

实例二：2800万高频重复值（1-99） + 200万个低频重复值（1000-1899）

CREATE TABLE id (poid INT NOT NULL, value NUMERIC, status int);
INSERT INTO id SELECT t%100, t, 0 FROM generate_series(1, 28000000) t;
INSERT INTO id SELECT 1000 + (t%900), t, 0 FROM generate_series(28000001, 30000000) t;
CREATE INDEX idx_id_poid ON id(poid);
analyze id;

统计信息

postgres=# select * from pg_stats where tablename='id' and attname='poid';
-[ RECORD 1 ]----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname             | public
tablename              | id
attname                | poid
inherited              | f
null_frac              | 0
avg_width              | 4
n_distinct             | 901
most_common_vals       | {33,61,96,2,27,17,41,85,40,59,62,65,93,6,15,46,47,71,54,48,88,44,67,75,79,95,18,21,45,68,82,7,72,91,52,49,94,11,34,22,23,25,38,5,74,30,43,55,4,8,35,1,53,56,92,12,39,51,64,86,89,20,87,0,24,99,10,32,19,57,80,81,31,50,70,90,9,66,73,3,78,97,69,77,58,37,42,28,76,98,84,13,63,14,60,26,16,36,29,83}
most_common_freqs      | {0.010966667,0.010933333,0.010866666,0.010533334,0.0104,0.0102,0.0102,0.0101666665,0.0101,0.010066667,0.010066667,0.009966667,0.009966667,0.009833333,0.009833333,0.009833333,0.009833333,0.009833333,0.0098,0.009766666,0.009766666,0.009733333,0.009733333,0.0097,0.0097,0.0097,0.009666666,0.009666666,0.009666666,0.009666666,0.009666666,0.009633333,0.009633333,0.009633333,0.009566667,0.009533334,0.009533334,0.0095,0.0095,0.009466667,0.009433334,0.009433334,0.009433334,0.0094,0.0094,0.009366667,0.009366667,0.009366667,0.009333333,0.009333333,0.009333333,0.0093,0.0093,0.0093,0.0093,0.009266667,0.009266667,0.009266667,0.009266667,0.009266667,0.009266667,0.009233333,0.009233333,0.0092,0.009166666,0.009166666,0.009133333,0.009133333,0.0090333335,0.009,0.008966667,0.008966667,0.008933334,0.008933334,0.008933334,0.008933334,0.0089,0.0089,0.0089,0.008866667,0.008866667,0.008866667,0.0088,0.008733333,0.0087,0.008666666,0.008633333,0.0086,0.0086,0.0086,0.008566666,0.008533333,0.008533333,0.0085,0.0085,0.008333334,0.0083,0.008266667,0.0082,0.0079}
histogram_bounds       | {1000,1005,1019,1030,1036,1047,1056,1064,1072,1080,1089,1100,1110,1122,1132,1140,1149,1158,1168,1178,1188,1195,1208,1215,1226,1238,1245,1255,1264,1275,1286,1292,1299,1309,1316,1323,1332,1339,1347,1355,1368,1378,1390,1399,1407,1418,1430,1438,1448,1453,1462,1472,1483,1492,1507,1516,1524,1536,1543,1549,1556,1562,1572,1578,1586,1593,1604,1611,1621,1628,1635,1646,1654,1663,1673,1683,1693,1704,1709,1718,1727,1734,1745,1751,1758,1766,1776,1785,1791,1798,1806,1814,1825,1834,1841,1851,1861,1872,1883,1893,1899}
correlation            | 0.19235307
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |postgres=# select count(distinct poid) from ii;count
-------1000
(1 row)postgres=# select count(1) from ii;count
----------30000000
(1 row)

n_distinct = 901
- 评估的唯一值个数比真实的少了99个，因为数据分布不均匀了
- 采样是30000个，但采集到1-99的概率：采集到1000-1899的概率=2800：200=14：1
- 所以这里会比真实值少一些。
most_common_vals = {33,61,96,2,27,17,41,85,40,59,62,65,93,6,15,46,47,71,54,48,88,44,67,75,79,95,18,21,45,68,82,7,72,91,52,49,94,11,34,22,23,25,38,5,74,30,43,55,4,8,35,1,53,56,92,12,39,51,64,86,89,20,87,0,24,99,10,32,19,57,80,81,31,50,70,90,9,66,73,3,78,97,69,77,58,37,42,28,76,98,84,13,63,14,60,26,16,36,29,83}
- 为什么MCV的个数比实例一多很多？
histogram_bounds = {1000,1005,1019,1030,1036,1047,1056,1064,1072,1080,1089,1100,1110,1122,1132,1140,1149,1158,1168,1178,1188,1195,1208,1215,1226,1238,1245,1255,1264,1275,1286,1292,1299,1309,1316,1323,1332,1339,1347,1355,1368,1378,1390,1399,1407,1418,1430,1438,1448,1453,1462,1472,1483,1492,1507,1516,1524,1536,1543,1549,1556,1562,1572,1578,1586,1593,1604,1611,1621,1628,1635,1646,1654,1663,1673,1683,1693,1704,1709,1718,1727,1734,1745,1751,1758,1766,1776,1785,1791,1798,1806,1814,1825,1834,1841,1851,1861,1872,1883,1893,1899}
- 直方图显示了非MCV值的分布情况，符合预期。

实例一的MCV只有8个值，实例二MCV个数远大于实例一，差异的原因？

实例一（均匀分布）：poid 列通过 t.i%1000 生成，每个值重复约 30,000 次（30,000,000 行 / 1000 唯一值）。由于分布均匀，MCV之间的频率差异极小。PostgreSQL只会选择《频率显著高于平均值》的值作为 MCV。理想情况下应该一个都选不出来，但由于随机采样，这几个值是因为在采样中略微高频，所以被选出来了。所以这里数量少的原因是，数据太平均了，选出来的比较少。
实例二（非均匀分布）：poid 列分为 100 个高频值（重复 280,000 次）和 900 个低频值（重复约 2,222 次）。高频值的频率（约 0.01）远高于低频值（约 0.0002），PostgreSQL 将《所有高频值》存入 most_common_vals。由于默认 default_statistics_target=100，系统会尽量填满 MCV 列表。

实例三：2999万高频重复值（1-49） + 1万个低频重复值（1000-1949）


-- 50个值高频重复值（2999万）+950个低频唯一值（1万）
CREATE TABLE iee (poid INT NOT NULL, value NUMERIC, status int);
INSERT INTO iee SELECT t%50, t, 0 FROM generate_series(1, 29990000) t;
INSERT INTO iee SELECT 1000 + (t%950), t, 0 FROM generate_series(29990001, 30000000) t;
CREATE INDEX idx_iee_poid ON iee(poid);
analyze iee;

统计信息

postgres=# select * from pg_stats where tablename='iee' and attname='poid';
-[ RECORD 1 ]----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname             | public
tablename              | iee
attname                | poid
inherited              | f
null_frac              | 0
avg_width              | 4
n_distinct             | 56
most_common_vals       | {26,3,41,1,20,38,15,37,8,22,46,27,32,48,29,7,43,30,44,12,16,42,5,25,40,24,34,39,6,49,0,11,28,21,18,14,17,33,47,35,31,2,4,10,45,9,19,23,13,36}
most_common_freqs      | {0.021866666,0.0215,0.021233333,0.021066668,0.021,0.020966666,0.020766666,0.020766666,0.0206,0.0206,0.0206,0.020433333,0.020366667,0.020333333,0.0203,0.020266667,0.020266667,0.0202,0.020166667,0.020133333,0.0201,0.020066667,0.020033333,0.020033333,0.02,0.019933334,0.019933334,0.019933334,0.019866666,0.019866666,0.0198,0.0198,0.019766666,0.019633334,0.0196,0.019566666,0.019533332,0.019533332,0.0195,0.019466667,0.019433333,0.019366667,0.0193,0.019266667,0.0191,0.019033333,0.018933333,0.018833334,0.0187,0.018433332}
histogram_bounds       | {1029,1243,1267,1378,1419,1797}
correlation            | 0.026665932
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |postgres=# select count(distinct poid) from ir;count
-------1000
(1 row)postgres=# select count(1) from ir;count
----------30000000
(1 row)

n_distinct = 56
- 唯一值的偏差更大了
- 因为50个重复值占据了2999万数据，采样30000条大概率都会采样到这50个数，所以这里n_distinct只有56个。
most_common_vals = {26,3,41,1,20,38,15,37,8,22,46,27,32,48,29,7,43,30,44,12,16,42,5,25,40,24,34,39,6,49,0,11,28,21,18,14,17,33,47,35,31,2,4,10,45,9,19,23,13,36}
histogram_bounds = {1029,1243,1267,1378,1419,1797}
- 直方图的值少了很多，因为采样到最后1万的概率会很低。

附其他测试数据

drop table mm;
CREATE TABLE mm (poid int primary key, edata date, ooid int);
CREATE INDEX idx_mm_1 ON mm(edata);
insert into mm select t.i, '2025-01-01 10:00:00',100 from generate_series(1,200) t(i);
insert into mm select t.i, '2025-01-01 11:00:00',130 from generate_series(201,400) t(i);
insert into mm select t.i, '2025-01-01 12:00:00',100 from generate_series(401,600) t(i);
insert into mm select t.i, '2025-01-01 13:00:00',130 from generate_series(601,800) t(i);
analyze mm;CREATE TABLE ii (poid INT NOT NULL, value NUMERIC, status int);
-- 分布均匀的话，采样会非常准确。
insert into ii select t.i%1000, t.i, 0 from generate_series(1,30000000) t(i);
CREATE INDEX idx_n_poid ON ii(poid);
analyze ii;postgres=# select * from pg_stats where tablename='ii' and attname='poid';
-[ RECORD 1 ]----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname             | public
tablename              | ii
attname                | poid
inherited              | f
null_frac              | 0
avg_width              | 4
n_distinct             | 1000
most_common_vals       | {334,580,881,431,2,33,79,112}
most_common_freqs      | {0.0016,0.0015333333,0.0015333333,0.0015,0.0014666667,0.0014666667,0.0014666667,0.0014666667}
histogram_bounds       | {0,10,20,30,41,52,61,71,82,91,99,109,119,129,139,149,159,168,178,189,197,207,217,226,236,246,256,266,276,286,296,306,315,325,336,346,356,365,374,386,396,406,417,427,438,448,458,468,478,488,498,508,518,528,537,549,558,569,578,590,599,609,619,629,639,649,659,669,679,687,697,707,719,729,740,750,760,771,781,791,800,810,820,829,839,849,859,869,880,891,901,910,921,931,941,950,961,970,980,990,999}
correlation            | 0.002622716
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |postgres=# select count(distinct poid) from ii;count
-------1000
(1 row)postgres=# select count(1) from ii;count
----------30000000
(1 row)-- 100个值高频重复值（2800万）+900个低频重复值（200万）
CREATE TABLE id (poid INT NOT NULL, value NUMERIC, status int);
INSERT INTO id SELECT t%100, t, 0 FROM generate_series(1, 28000000) t;
INSERT INTO id SELECT 1000 + (t%900), t, 0 FROM generate_series(28000001, 30000000) t;
CREATE INDEX idx_id_poid ON id(poid);
analyze id;postgres=# select * from pg_stats where tablename='id' and attname='poid';
-[ RECORD 1 ]----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname             | public
tablename              | id
attname                | poid
inherited              | f
null_frac              | 0
avg_width              | 4
n_distinct             | 901
most_common_vals       | {33,61,96,2,27,17,41,85,40,59,62,65,93,6,15,46,47,71,54,48,88,44,67,75,79,95,18,21,45,68,82,7,72,91,52,49,94,11,34,22,23,25,38,5,74,30,43,55,4,8,35,1,53,56,92,12,39,51,64,86,89,20,87,0,24,99,10,32,19,57,80,81,31,50,70,90,9,66,73,3,78,97,69,77,58,37,42,28,76,98,84,13,63,14,60,26,16,36,29,83}
most_common_freqs      | {0.010966667,0.010933333,0.010866666,0.010533334,0.0104,0.0102,0.0102,0.0101666665,0.0101,0.010066667,0.010066667,0.009966667,0.009966667,0.009833333,0.009833333,0.009833333,0.009833333,0.009833333,0.0098,0.009766666,0.009766666,0.009733333,0.009733333,0.0097,0.0097,0.0097,0.009666666,0.009666666,0.009666666,0.009666666,0.009666666,0.009633333,0.009633333,0.009633333,0.009566667,0.009533334,0.009533334,0.0095,0.0095,0.009466667,0.009433334,0.009433334,0.009433334,0.0094,0.0094,0.009366667,0.009366667,0.009366667,0.009333333,0.009333333,0.009333333,0.0093,0.0093,0.0093,0.0093,0.009266667,0.009266667,0.009266667,0.009266667,0.009266667,0.009266667,0.009233333,0.009233333,0.0092,0.009166666,0.009166666,0.009133333,0.009133333,0.0090333335,0.009,0.008966667,0.008966667,0.008933334,0.008933334,0.008933334,0.008933334,0.0089,0.0089,0.0089,0.008866667,0.008866667,0.008866667,0.0088,0.008733333,0.0087,0.008666666,0.008633333,0.0086,0.0086,0.0086,0.008566666,0.008533333,0.008533333,0.0085,0.0085,0.008333334,0.0083,0.008266667,0.0082,0.0079}
histogram_bounds       | {1000,1005,1019,1030,1036,1047,1056,1064,1072,1080,1089,1100,1110,1122,1132,1140,1149,1158,1168,1178,1188,1195,1208,1215,1226,1238,1245,1255,1264,1275,1286,1292,1299,1309,1316,1323,1332,1339,1347,1355,1368,1378,1390,1399,1407,1418,1430,1438,1448,1453,1462,1472,1483,1492,1507,1516,1524,1536,1543,1549,1556,1562,1572,1578,1586,1593,1604,1611,1621,1628,1635,1646,1654,1663,1673,1683,1693,1704,1709,1718,1727,1734,1745,1751,1758,1766,1776,1785,1791,1798,1806,1814,1825,1834,1841,1851,1861,1872,1883,1893,1899}
correlation            | 0.19235307
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |postgres=# select count(distinct poid) from ii;count
-------1000
(1 row)postgres=# select count(1) from ii;count
----------30000000
(1 row)-- 50个值高频重复值（2970万）+950个低频唯一值（30万）
CREATE TABLE ie (poid INT NOT NULL, value NUMERIC, status int);
INSERT INTO ie SELECT t%50, t, 0 FROM generate_series(1, 29700000) t;
INSERT INTO ie SELECT 1000 + (t%950), t, 0 FROM generate_series(29700001, 30000000) t;
CREATE INDEX idx_ie_poid ON ie(poid);
analyze ie;postgres=# select * from pg_stats where tablename='ie' and attname='poid';
-[ RECORD 1 ]----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname             | public
tablename              | ie
attname                | poid
inherited              | f
null_frac              | 0
avg_width              | 4
n_distinct             | 318
most_common_vals       | {20,18,44,10,13,9,24,19,29,47,12,30,32,49,48,5,25,2,39,6,7,14,36,43,26,35,16,42,38,31,0,46,4,8,23,11,15,37,41,34,17,45,22,28,21,40,1,3,33,27}
most_common_freqs      | {0.021433333,0.021133333,0.021066668,0.021,0.020933334,0.0209,0.020866666,0.020833334,0.020766666,0.0207,0.0206,0.0205,0.0205,0.0204,0.020333333,0.020266667,0.020266667,0.020166667,0.020166667,0.020133333,0.020133333,0.020133333,0.02,0.02,0.019866666,0.019766666,0.019733334,0.019733334,0.019666666,0.019566666,0.019533332,0.019533332,0.019366667,0.019366667,0.019366667,0.0193,0.019233333,0.019166667,0.0191,0.0189,0.018833334,0.018833334,0.018733334,0.0187,0.0186,0.0185,0.0184,0.0184,0.0183,0.0176}
histogram_bounds       | {1003,1015,1024,1030,1040,1051,1082,1090,1101,1114,1118,1132,1137,1149,1164,1169,1173,1183,1191,1197,1204,1209,1225,1234,1250,1257,1272,1282,1288,1293,1300,1303,1312,1325,1332,1347,1361,1377,1392,1409,1415,1419,1427,1431,1440,1444,1448,1455,1462,1482,1490,1505,1514,1524,1529,1535,1539,1544,1558,1567,1575,1581,1584,1592,1597,1613,1620,1625,1639,1653,1669,1673,1681,1688,1698,1699,1707,1721,1727,1737,1755,1763,1768,1786,1795,1804,1810,1818,1822,1840,1856,1859,1869,1882,1890,1898,1902,1904,1917,1925,1943}
correlation            | 0.057023432
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |postgres=# select count(distinct poid) from ie;count
-------1000
(1 row)postgres=# select count(1) from ie;count
----------30000000
(1 row)postgres=# select * from ie limit 10;poid | value | status
------+-------+--------1 |     1 |      02 |     2 |      03 |     3 |      04 |     4 |      05 |     5 |      06 |     6 |      07 |     7 |      08 |     8 |      09 |     9 |      010 |    10 |      0
(10 rows)-- 50个值高频重复值（2970万）+950个低频唯一值（30万）+ 随机分布CREATE TABLE ir (poid INT NOT NULL, value NUMERIC, status int);
INSERT INTO ir SELECT t%50, t, 0 FROM generate_series(1, 29700000) t ORDER BY random();
INSERT INTO ir SELECT 1000 + (t%950), t, 0 FROM generate_series(29700001, 30000000) t ORDER BY random();
CREATE INDEX idx_ir_poid ON ir(poid);
analyze ir;postgres=# select * from pg_stats where tablename='ir' and attname='poid';
-[ RECORD 1 ]----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname             | public
tablename              | ir
attname                | poid
inherited              | f
null_frac              | 0
avg_width              | 4
n_distinct             | 330
most_common_vals       | {9,29,14,34,25,46,17,39,21,1,18,45,49,27,38,41,42,36,30,11,35,16,47,2,15,40,0,31,24,37,19,44,48,23,3,4,13,6,32,33,43,12,26,10,28,8,22,5,20,7}
most_common_freqs      | {0.021433333,0.021233333,0.021,0.0209,0.020833334,0.0208,0.020766666,0.020733334,0.020666666,0.020433333,0.020433333,0.020366667,0.020366667,0.0203,0.0203,0.0202,0.0202,0.020166667,0.020133333,0.0201,0.020066667,0.020033333,0.020033333,0.02,0.019833334,0.0198,0.019733334,0.0197,0.0196,0.0196,0.019566666,0.019566666,0.019566666,0.019466667,0.019433333,0.019366667,0.0193,0.019233333,0.019233333,0.019133333,0.0191,0.019033333,0.018833334,0.018733334,0.0187,0.018533334,0.018466666,0.018366667,0.018333333,0.017566666}
histogram_bounds       | {1001,1006,1014,1017,1024,1040,1046,1058,1063,1073,1086,1098,1111,1117,1135,1142,1146,1158,1165,1175,1180,1189,1199,1204,1215,1232,1247,1250,1262,1273,1281,1286,1296,1299,1301,1314,1322,1325,1332,1336,1343,1353,1360,1374,1385,1404,1418,1434,1442,1456,1466,1476,1481,1487,1502,1511,1515,1518,1539,1550,1562,1573,1580,1583,1603,1621,1642,1659,1666,1675,1682,1690,1693,1703,1709,1726,1744,1751,1758,1762,1773,1778,1782,1787,1791,1803,1810,1830,1835,1853,1874,1880,1882,1895,1897,1907,1916,1927,1933,1938,1949}
correlation            | 0.057593007
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |postgres=# select count(distinct poid) from ir;count
-------1000
(1 row)postgres=# select count(1) from ir;count
----------30000000
(1 row)postgres=# select * from ir limit 10;poid |  value   | status
------+----------+--------0 | 26165250 |      043 | 21564993 |      02 |  4975852 |      047 | 28519047 |      037 | 12294337 |      03 | 15888653 |      018 | 18510218 |      00 | 16988450 |      042 | 24585492 |      032 | 19939732 |      0
(10 rows)-- 50个值高频重复值（2999万）+950个低频唯一值（1万）
CREATE TABLE iee (poid INT NOT NULL, value NUMERIC, status int);
INSERT INTO iee SELECT t%50, t, 0 FROM generate_series(1, 29990000) t;
INSERT INTO iee SELECT 1000 + (t%950), t, 0 FROM generate_series(29990001, 30000000) t;
CREATE INDEX idx_iee_poid ON iee(poid);
analyze iee;postgres=# select * from pg_stats where tablename='iee' and attname='poid';
-[ RECORD 1 ]----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname             | public
tablename              | iee
attname                | poid
inherited              | f
null_frac              | 0
avg_width              | 4
n_distinct             | 56
most_common_vals       | {26,3,41,1,20,38,15,37,8,22,46,27,32,48,29,7,43,30,44,12,16,42,5,25,40,24,34,39,6,49,0,11,28,21,18,14,17,33,47,35,31,2,4,10,45,9,19,23,13,36}
most_common_freqs      | {0.021866666,0.0215,0.021233333,0.021066668,0.021,0.020966666,0.020766666,0.020766666,0.0206,0.0206,0.0206,0.020433333,0.020366667,0.020333333,0.0203,0.020266667,0.020266667,0.0202,0.020166667,0.020133333,0.0201,0.020066667,0.020033333,0.020033333,0.02,0.019933334,0.019933334,0.019933334,0.019866666,0.019866666,0.0198,0.0198,0.019766666,0.019633334,0.0196,0.019566666,0.019533332,0.019533332,0.0195,0.019466667,0.019433333,0.019366667,0.0193,0.019266667,0.0191,0.019033333,0.018933333,0.018833334,0.0187,0.018433332}
histogram_bounds       | {1029,1243,1267,1378,1419,1797}
correlation            | 0.026665932
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |postgres=# select count(distinct poid) from ir;count
-------1000
(1 row)postgres=# select count(1) from ir;count
----------30000000
(1 row)