PG中通过GIST创建其他自定义索引
文章目录
- 前言
- 一、文件结构
- 二、编译扩展
- 三、测试索引扩展
- 完全删除扩展
- 测试
- 四、代码介绍
前言
PG中Gist提供了接口,可以让用户自定义相应的接口并继承,在这里我们以创建一个只支持int8类型的btree_gist扩展为例,来详细介绍其定义一个扩展所需要的文件及过程
一、文件结构
1、在contrib目录下创建
btree_gist_int8`索引项目
cd ~/gist/postgres/contrib
mkdir -p btree_gist_int8
cd btree_gist_int8
需要的C代码
核心代码如下:
btree_gist.c
btree_gist.h
btree_int8.c
btree_utils_num.c
btree_utils_num.h
btree_utils_var.h
2、创建Makefile文
件:
MODULE_big = btree_gist_int8OBJS = \$(WIN32RES) \btree_gist.o \btree_int8.o \btree_utils_num.oEXTENSION = btree_gist_int8
DATA = btree_gist_int8--1.0.sqlPGFILEDESC = "btree_gist - GiST operator class for int8"REGRESS = int8SHLIB_LINK += $(filter -lm, $(LIBS))ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = contrib/btree_gist_int8
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif
3、创建btree_gist_int8.control
控制文件,其中module_pathname = '$libdir/btree_gist'
是在PG扩展控制文件(.control)中的一个设置,它指定了扩展的共享库文件的路径。在实际运行中,PG会将$libdir$
替换为实际的库目录路径。例如,如果PG安装在/usr/local/pgsql
,那么$libdir
指向/usr/local/pgsql/lib
。
注意我们要将module_pathname = '$libdir/btree_gist_int8'
改为你扩展项目的名称。
# btree_gist extension
comment = 'support for indexing common datatypes in GiST'
default_version = '1.0'
module_pathname = '$libdir/btree_gist_int8'
relocatable = true
trusted = true
4、创建btree_gist_int8--1.0.sql
的SQL
文件,包含只有int8相关的定义:
/* contrib/btree_gist/btree_gist--1.2.sql */-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION btree_gist_int8" to load this file. \quitCREATE FUNCTION gbtreekey16_in(cstring)
RETURNS gbtreekey16
AS 'MODULE_PATHNAME', 'gbtreekey_in'
LANGUAGE C IMMUTABLE STRICT;CREATE FUNCTION gbtreekey16_out(gbtreekey16)
RETURNS cstring
AS 'MODULE_PATHNAME', 'gbtreekey_out'
LANGUAGE C IMMUTABLE STRICT;CREATE TYPE gbtreekey16 (INTERNALLENGTH = 16,INPUT = gbtreekey16_in,OUTPUT = gbtreekey16_out
);CREATE FUNCTION gbtreekey8_in(cstring)
RETURNS gbtreekey8
AS 'MODULE_PATHNAME', 'gbtreekey_in'
LANGUAGE C IMMUTABLE STRICT;CREATE FUNCTION gbtreekey8_out(gbtreekey8)
RETURNS cstring
AS 'MODULE_PATHNAME', 'gbtreekey_out'
LANGUAGE C IMMUTABLE STRICT;CREATE TYPE gbtreekey8 (INTERNALLENGTH = 8,INPUT = gbtreekey8_in,OUTPUT = gbtreekey8_out
);--distance operators
CREATE FUNCTION int8_dist(int8, int8)
RETURNS int8
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;CREATE OPERATOR <-> (LEFTARG = int8,RIGHTARG = int8,PROCEDURE = int8_dist,COMMUTATOR = '<->'
);CREATE FUNCTION gbt_decompress(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;--
--
--
-- int8 ops
--
--
--
-- define the GiST support methods
CREATE FUNCTION gbt_int8_consistent(internal,int8,int2,oid,internal)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;CREATE FUNCTION gbt_int8_distance(internal,int8,int2,oid,internal)
RETURNS float8
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;CREATE FUNCTION gbt_int8_compress(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;CREATE FUNCTION gbt_int8_fetch(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;CREATE FUNCTION gbt_int8_penalty(internal,internal,internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;CREATE FUNCTION gbt_int8_picksplit(internal, internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;CREATE FUNCTION gbt_int8_union(internal, internal)
RETURNS gbtreekey16
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;CREATE FUNCTION gbt_int8_same(gbtreekey16, gbtreekey16, internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;-- Create the operator class
CREATE OPERATOR CLASS gist_int8_ops
DEFAULT FOR TYPE int8 USING gist
ASOPERATOR 1 < ,OPERATOR 2 <= ,OPERATOR 3 = ,OPERATOR 4 >= ,OPERATOR 5 > ,FUNCTION 1 gbt_int8_consistent (internal, int8, int2, oid, internal),FUNCTION 2 gbt_int8_union (internal, internal),FUNCTION 3 gbt_int8_compress (internal),FUNCTION 4 gbt_decompress (internal),FUNCTION 5 gbt_int8_penalty (internal, internal, internal),FUNCTION 6 gbt_int8_picksplit (internal, internal),FUNCTION 7 gbt_int8_same (gbtreekey16, gbtreekey16, internal),STORAGE gbtreekey16;ALTER OPERATOR FAMILY gist_int8_ops USING gist ADDOPERATOR 6 <> (int8, int8) ,OPERATOR 15 <-> (int8, int8) FOR ORDER BY pg_catalog.integer_ops ,FUNCTION 8 (int8, int8) gbt_int8_distance (internal, int8, int2, oid, internal) ,FUNCTION 9 (int8, int8) gbt_int8_fetch (internal) ;
二、编译扩展
在~/gist/postgres/contrib/btree_gist_int8$
中执行
make
sudo make install
最终得到如下输出,从如下输出可以看到,会将编译好的共享库文件btree_gist_int8.so
复制到PG的
库目录/usr/local/pgsql/lib/
下面。以及将扩展的控制文件btree_gist_int8.control
复制到PG的扩展目录/usr/local/pgsql/share/extension
中。将扩展的SQL脚本文件btree_gist_int8--1.0.sql
复制到PG的扩展目录
。这三个文件共同构成了一个完整的PostgreSQL扩展。
/usr/bin/mkdir -p '/usr/local/pgsql/lib'
/usr/bin/mkdir -p '/usr/local/pgsql/share/extension'
/usr/bin/mkdir -p '/usr/local/pgsql/share/extension'
/usr/bin/install -c -m 755 btree_gist_int8.so '/usr/local/pgsql/lib/btree_gist_int8.so'
/usr/bin/install -c -m 644 ./btree_gist_int8.control '/usr/local/pgsql/share/extension/'
/usr/bin/install -c -m 644 ./btree_gist_int8--1.0.sql '/usr/local/pgsql/share/extension/'
编译后生成的文件为:
btree_gist_int8.so
btree_gist.o
btree_int8.o
btree_utils_num.o
然后在/usr/local/pgsql/lib
中存在btree_gist_int8.so
三、测试索引扩展
连接PostgreSQL数据库/usr/local/pgsql/bin/psql -U postgres
-- 创建测试表
CREATE TABLE test_int8 (id serial PRIMARY KEY,value int8
);-- 插入测试数据
INSERT INTO test_int8 (value)
SELECT generate_series(1, 10000)::int8;
创建GiST索引,通过执行CREATE INDEX idx_test_int8_gist ON test_int8 USING gist(value);
,成功在在test_int8
表上创建一个名为idx_test_int8_gist
的索引,执行索引的列为value列
CREATE EXTENSION btree_gist_int8;
CREATE INDEX idx_test_int8_gist ON test_int8 USING gist(value);
执行各种测试查询
-- 测试相等查询
EXPLAIN ANALYZE
SELECT * FROM test_int8 WHERE value = 5000;-- 测试范围查询
EXPLAIN ANALYZE
SELECT * FROM test_int8 WHERE value BETWEEN 4000 AND 6000;
测试索引性能
-- 禁用GiST索引
SET enable_gist = off;EXPLAIN ANALYZE
SELECT * FROM test_int8 WHERE value BETWEEN 4000 AND 6000;-- 启用GiST索引
SET enable_gist = on;EXPLAIN ANALYZE
SELECT * FROM test_int8 WHERE value BETWEEN 4000 AND 6000;
输出结果
gist_test_db=# CREATE INDEX idx_test_int8_gist ON test_int8 USING gist(value);
CREATE INDEX
gist_test_db=# EXPLAIN ANALYZE
gist_test_db-# SELECT * FROM test_int8 WHERE value = 5000;QUERY PLAN
------------------------------------------------------------------------------------------------------Seq Scan on test_int8 (cost=0.00..180.00 rows=1 width=12) (actual time=0.832..1.633 rows=1 loops=1)Filter: (value = 5000)Rows Removed by Filter: 9999Buffers: shared hit=55Planning:Buffers: shared hit=38Planning Time: 0.223 msExecution Time: 1.660 ms
(8 rows)gist_test_db=#
删除索引,通过如下命令,可以实现保留表但只删除索引和扩展
gist_test_db=# DROP INDEX idx_test_int8_gist;
DROP INDEX
gist_test_db=# DROP EXTENSION btree_gist_int8;
DROP EXTENSION
gist_test_db=#
如果现在PG中并没有该扩展,当你想创建value列的索引时候,会出现如下问题:
gist_test_db=# CREATE INDEX idx_test_int8_gist ON test_int8 USING gist(value);
ERROR: data type bigint has no default operator class for access method "gist"
HINT: You must specify an operator class for the index or define a default operator class for the data type.
gist_test_db=#
完全删除扩展
以删除扩展 btree_gist_int8
为例
DROP EXTENSION IF EXISTS btree_gist_int8 CASCADE;
DROP FUNCTION IF EXISTS gbtreekey16_in(cstring) CASCADE;
测试
使用EXPLAIN ANALYZE SELECT * FROM test_data WHERE id = 5000;
,输出结果如下:
gist_test_db=# EXPLAIN ANALYZE SELECT * FROM test_data WHERE id = 5000;QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------Index Scan using test_data_pkey on test_data (cost=0.29..8.30 rows=1 width=17) (actual time=0.068..0.069 rows=1 loops=1)Index Cond: (id = 5000)Buffers: shared hit=3 read=3Planning:Buffers: shared hit=41 read=10 dirtied=1Planning Time: 0.381 msExecution Time: 0.100 ms
(7 rows)
这里using test_data_pkey
说明使用的是名为 test_data_pkey
的索引,PG默认给 PRIMARY KEY(id)
创建的索引就叫这个名。
你可以对相同的 test_data.id
字段建立 GiST 索引(在我们配置好自己的自定义的 扩展配置后),通过如下命令,其中gist_int8_ops
是我们之前在sql中Create the operator class CREATE OPERATOR CLASS gist_int8_ops
为该扩展定义的操作符,根据自己的定义来改变这个内容。
CREATE INDEX gist_id_index ON test_data USING gist (id gist_int8_ops);
并执行:
SET enable_indexscan = off;
SET enable_bitmapscan = off;
SET enable_seqscan = off;
EXPLAIN ANALYZE SELECT * FROM test_data WHERE id = 5000;
四、代码介绍
在btree_gist_int8--1.0.sql
中定义了如下内容
CREATE FUNCTION gbt_int8_compress(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
然后在btree_int8.c
中声明,下面这些是扩展模块里暴露给 SQL 使用的函数。你可以在 SQL 脚本中 CREATE FUNCTION
时用它们。
/*
** int64 ops
*/
PG_FUNCTION_INFO_V1(gbt_int8_compress);
PG_FUNCTION_INFO_V1(gbt_int8_fetch);
PG_FUNCTION_INFO_V1(gbt_int8_union);
PG_FUNCTION_INFO_V1(gbt_int8_picksplit);
PG_FUNCTION_INFO_V1(gbt_int8_consistent);
PG_FUNCTION_INFO_V1(gbt_int8_distance);
PG_FUNCTION_INFO_V1(gbt_int8_penalty);
PG_FUNCTION_INFO_V1(gbt_int8_same);
Datum
gbt_int8_consistent(PG_FUNCTION_ARGS)
{GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);int64 query = PG_GETARG_INT64(1);StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);/* Oid subtype = PG_GETARG_OID(3); */bool *recheck = (bool *) PG_GETARG_POINTER(4);int64KEY *kkk = (int64KEY *) DatumGetPointer(entry->key);GBT_NUMKEY_R key;/* All cases served by this function are exact */*recheck = false;key.lower = (GBT_NUMKEY *) &kkk->lower;key.upper = (GBT_NUMKEY *) &kkk->upper;PG_RETURN_BOOL(gbt_num_consistent(&key, &query, &strategy,GIST_LEAF(entry), &tinfo, fcinfo->flinfo));
}
然后调用了btree_utils_num.c
中的gbt_num_consistent
函数来执行一致性查询
/** The GiST consistent method** Note: we currently assume that no datatypes that use this routine are* collation-aware; so we don't bother passing collation through.*/
bool
gbt_num_consistent(const GBT_NUMKEY_R *key,const void *query,const StrategyNumber *strategy,bool is_leaf,const gbtree_ninfo *tinfo,FmgrInfo *flinfo)
{bool retval;switch (*strategy){case BTLessEqualStrategyNumber:retval = tinfo->f_ge(query, key->lower, flinfo);break;case BTLessStrategyNumber:if (is_leaf)retval = tinfo->f_gt(query, key->lower, flinfo);elseretval = tinfo->f_ge(query, key->lower, flinfo);break;case BTEqualStrategyNumber:if (is_leaf)retval = tinfo->f_eq(query, key->lower, flinfo);elseretval = (tinfo->f_le(key->lower, query, flinfo) &&tinfo->f_le(query, key->upper, flinfo));break;case BTGreaterStrategyNumber:if (is_leaf)retval = tinfo->f_lt(query, key->upper, flinfo);elseretval = tinfo->f_le(query, key->upper, flinfo);break;case BTGreaterEqualStrategyNumber:retval = tinfo->f_le(query, key->upper, flinfo);break;case BtreeGistNotEqualStrategyNumber:retval = (!(tinfo->f_eq(query, key->lower, flinfo) &&tinfo->f_eq(query, key->upper, flinfo)));break;default:retval = false;}return retval;
}