Alexey Vasiliev, Railsware
Brought to you by Alexey Vasiliev, Railsware
PostgreSQL ("Postgres") - is an object-relational database management system (ORDBMS) with an emphasis on extensibility and standards-compliance. Based on the SQL language and supports many of the features of the standard SQL:2011. PostgreSQL evolved from the Ingres project at the University of California, Berkeley. In 1982 the leader of the Ingres team, Michael Stonebraker, left Berkeley to make a proprietary version of Ingres. He returned to Berkeley in 1985 and started a post-Ingres project
# CREATE TABLE xmltest (
data xml NOT NULL
);
# INSERT INTO xmltest (data) VALUES ('<attendee><bio>
<name>John Doe</name>
<birthYear>1986</birthYear></bio><languages>
<lang level="5">php</lang><lang level="4">python</lang>
<lang level="2">java</lang></languages></attendee>');
# SELECT data FROM xmltest WHERE
CAST (xpath('//bio/name/text()', data) as text[]) = '{John Doe}';
data
--------------------------------
<attendee> +
<bio> +
<name>John Doe</name> +
<birthYear>1986</birthYear> +
</bio> +
...
(1 row)
# EXPLAIN SELECT data FROM xmltest WHERE
CAST (xpath('//bio/name/text()', data) as text[]) = '{John Doe}';
QUERY PLAN
--------------------------------------------------
Seq Scan on xmltest (cost=0.00..29.05 rows=6 width=32)
Filter: ((xpath('//bio/name/text()'::text, data,
'{}'::text[]))::text[] = '{"John Doe"}'::text[])
(2 rows)
# CREATE INDEX data_bio_name_idx ON xmltest
USING btree (CAST (xpath('//bio/name/text()', data) as text[]));
CREATE INDEX
# EXPLAIN SELECT data FROM xmltest WHERE
CAST (xpath('//bio/name/text()', data) as text[]) = '{John Doe}';
QUERY PLAN
--------------------------------------------------------------------------------
Index Scan using data_bio_name_idx on xmltest (cost=0.13..8.15 rows=1 width=32)
Index Cond: ((xpath('//bio/name/text()'::text, data,
'{}'::text[]))::text[] = '{"John Doe"}'::text[])
(2 rows)
HStore is a key value store within PostgreSQL
# CREATE EXTENSION hstore;
CREATE EXTENSION
# SELECT 'a=>1,a=>2'::hstore;
hstore
----------
"a"=>"1"
(1 row)
# CREATE TABLE products (
id serial PRIMARY KEY,
name varchar,
attributes hstore
);
# INSERT INTO products
(name, attributes)
VALUES (
'Geek Love: A Novel',
'author => "Katherine Dunn",
pages => 368,
category => fiction'
), (
'Leica M9',
'manufacturer => Leica,
type => camera,
megapixels => 18,
sensor => "full-frame 35mm"'
), ...
# SELECT name, attributes->'pages' as page FROM products
WHERE attributes ? 'pages';
name | page
--------------------+------
Geek Love: A Novel | 368
(1 row)
# SELECT name, attributes->'manufacturer' as manufacturer FROM products
WHERE attributes->'type' = 'computer';
name | manufacturer
----------------+--------------
MacBook Air 11 | Apple
(1 row)
# CREATE INDEX products_hstore_gist_index ON products
USING GIST (attributes);
# CREATE INDEX products_hstore_gin_index ON products
USING GIN (attributes);
# CREATE INDEX product_manufacturer ON products
((products.attributes->'manufacturer'));
# EXPLAIN SELECT name, attributes->'manufacturer' as manufacturer
FROM products WHERE attributes @> hstore('type', 'computer');
QUERY PLAN
--------------------------------------------------------------
Index Scan using products_hstore_index on products
(cost=0.13..8.15 rows=1 width=46)
Index Cond: (attributes @> '"type"=>"computer"'::hstore)
(2 rows)
CREATE OR REPLACE FUNCTION
get_json_key(structure JSON, key TEXT) RETURNS TEXT
AS $get_json_key$
var js_object = structure;
if (typeof ej != 'object')
return NULL;
return JSON.stringify(js_object[key]);
$get_json_key$
IMMUTABLE STRICT LANGUAGE plv8;
# CREATE TABLE blog {
post json
}
# CREATE INDEX post_pk_idx ON
blog((get_json_key(post, ‘post_id’)::BIGINT));
# CREATE INDEX post_date_idx ON
blog((get_json_key(post, ‘post_date’)::TIMESTAMPTZ));
# CREATE OR REPLACE FUNCTION valid_json(json text) RETURNS BOOLEAN AS $$
try {
JSON.parse(json); return true;
} catch(e) {
return false;
}
$$ LANGUAGE plv8 IMMUTABLE STRICT;
# CREATE DOMAIN json AS TEXT CHECK(valid_json(VALUE));
# CREATE TABLE members ( id SERIAL, profile json );
# INSERT INTO members VALUES('not good json');
ERROR: value for domain json
violates check constraint "json_check"
# INSERT INTO members VALUES('{"good": "json", "is": true}');
INSERT 0 1
The only difference between json and jsonb is their storage:
There are 3 major consequences of this:
# SELECT '{"name": "Marie", "age": 45}'::jsonb ||
'{"city": "Paris"}'::jsonb;
?column?
-----------------------------------------------
{"age": 45, "city": "Paris", "name": "Marie"}
(1 row)
# CREATE TABLE integrations (id UUID, data JSONB);
# INSERT INTO integrations(id, data) VALUES (
uuid_generate_v4(),
'{
"service": "salesforce",
"id": "AC347D212341XR",
"email": "test@example.com",
"occurred_at": "8/14/16 11:00:00",
"added": {
"lead_score": 50
}
}');
# CREATE INDEX idx_integrations_gist_data
ON integrations USING GIST(data);
# CREATE INDEX idx_integrations_gin_data
ON integrations USING GIN(data);
# CREATE TABLE measurements (
tick BIGSERIAL PRIMARY KEY,
value_1 INTEGER,
value_2 INTEGER,
value_3 INTEGER,
scientist_id BIGINT
);
When To Avoid JSONB In A PostgreSQL Schema
# SELECT lab_name, COUNT(*) FROM (SELECT scientist_id
FROM measurements WHERE value_1 = 0 AND
value_2 = 0 AND value_3 = 0) m
JOIN scientist_labs AS s ON (m.scientist_id = s.scientist_id)
GROUP BY lab_name
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=28905.96..28905.99 rows=3 width=20) (actual time=297.276..297.278 rows=3 loops=1)
Group Key: s.lab_name
-> Hash Join (cost=296.00..28279.80 rows=125232 width=20) (actual time=5.606..262.124 rows=125222 loops=1)
Hash Cond: (measurements.scientist_id = s.scientist_id)
-> Seq Scan on measurements (cost=0.00..24853.00 rows=125232 width=8) (actual time=0.016..177.659 rows=125222 loops=1)
Filter: ((value_1 = 0) AND (value_2 = 0) AND (value_3 = 0))
Rows Removed by Filter: 874778
-> Hash (cost=171.00..171.00 rows=10000 width=28) (actual time=5.575..5.575 rows=10000 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 599kB
-> Seq Scan on scientist_labs s (cost=0.00..171.00 rows=10000 width=28) (actual time=0.006..2.328 rows=10000 loops=1)
Planning time: 0.603 ms
Execution time: 297.346 ms
(12 rows)
Time: 300.463 ms
CREATE TABLE measurements (tick BIGSERIAL PRIMARY KEY, record JSONB);
SELECT lab_name, COUNT(*) FROM ( SELECT
(record ->> 'scientist_id')::BIGINT AS scientist_id
FROM measurements WHERE (record ->> 'value_1')::INTEGER = 0 AND
(record ->> 'value_2')::INTEGER = 0
AND (record ->> 'value_3')::INTEGER = 0
) m JOIN scientist_labs AS s ON (m.scientist_id = s.scientist_id)
GROUP BY lab_name
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
HashAggregate (cost=58553.01..58553.02 rows=1 width=20) (actual time=583724.702..583724.703 rows=3 loops=1)
Group Key: s.lab_name
-> Nested Loop (cost=0.00..58553.00 rows=1 width=20) (actual time=5.457..583510.640 rows=124616 loops=1)
Join Filter: (((measurements.record ->> 'scientist_id'::text))::bigint = s.scientist_id)
Rows Removed by Join Filter: 1246035384
-> Seq Scan on measurements (cost=0.00..58182.00 rows=1 width=105) (actual time=0.032..1134.662 rows=124616 loops=1)
Filter: ((((record ->> 'value_1'::text))::integer = 0) AND (((record ->> 'value_2'::text))::integer = 0) AND (((record ->> 'value_3'::text))::integer = 0))
Rows Removed by Filter: 875384
-> Seq Scan on scientist_labs s (cost=0.00..171.00 rows=10000 width=28) (actual time=0.003..0.864 rows=10000 loops=124616)
Planning time: 0.603 ms
Execution time: 583724.765 ms
(11 rows)
Time: 583730.320 ms