-- Original query.
EXPLAIN (ANALYZE, BUFFERS)
SELECT name
FROM
(SELECT name FROM sourcepackagename
WHERE
name LIKE '%man%'
AND id IN (
SELECT sourcepackagename FROM distributionsourcepackagecache
WHERE archive IN (SELECT id FROM archive WHERE purpose IN (1, 4)) OR archive IS NULL)
ORDER BY name LIMIT 10) AS spn
UNION
(SELECT name FROM binarypackagename
WHERE
name LIKE '%man%'
AND id IN (
SELECT binarypackagename FROM distroseriespackagecache
WHERE archive IN (SELECT id FROM archive WHERE purpose IN (1, 4)))
ORDER BY name LIMIT 10)
ORDER BY name LIMIT 10;
-- Query is artifical; should be restricted by distro, and we can use
-- main_archive_ids to avoid a point of confusion for the planner.
--
-- Also indexes. Indexes everywhere.
-- CREATE INDEX temp_dspc_really ON distroseriespackagecache (archive, binarypackagename);
-- CREATE INDEX temp_dspc_really_the_other_one ON distributionsourcepackagecache (archive, sourcepackagename);
EXPLAIN (ANALYZE, BUFFERS)
SELECT name
FROM
(SELECT name FROM sourcepackagename
WHERE
name LIKE '%man%'
AND id IN (
SELECT sourcepackagename FROM distributionsourcepackagecache
WHERE archive IN (1, 534) OR archive IS NULL)
ORDER BY name LIMIT 10) AS spn
UNION
(SELECT name FROM binarypackagename
WHERE
name LIKE '%man%'
AND id IN (
SELECT binarypackagename FROM distroseriespackagecache
WHERE archive IN (1, 534))
ORDER BY name LIMIT 10)
ORDER BY name LIMIT 10;
-- ~100ms
-- The source query still picks a hash join rather than a nested loop.
-- But we have the name data in the tables themselves.
-- CREATE EXTENSION btree_gin;
-- CREATE INDEX temp_dspc_hm_trgm_maybe ON distributionsourcepackagecache USING gin (archive, name trgm.gin_trgm_ops);
EXPLAIN (ANALYZE, BUFFERS)
SELECT name
FROM
(SELECT name FROM distributionsourcepackagecache
WHERE
name LIKE '%linux%'
AND archive IN (1, 534) OR archive IS NULL
ORDER BY name LIMIT 10) AS spn
UNION
(SELECT name FROM binarypackagename
WHERE
name LIKE '%linux%'
AND id IN (
SELECT binarypackagename FROM distroseriespackagecache
WHERE archive IN (1, 534))
ORDER BY name LIMIT 10)
ORDER BY name LIMIT 10;
-- <10ms
-- But why was it going the wrong way for sources? Let's backtrack from the GIN approach a bit.
-- It really wants to hash or merge. What if we invert the index?
-- CREATE INDEX temp_dspc_really_the_other_one_2 ON distributionsourcepackagecache (sourcepackagename, archive);
EXPLAIN (ANALYZE, BUFFERS)
SELECT name
FROM
(SELECT name FROM sourcepackagename
WHERE
name LIKE '%linux%'
AND id IN (
SELECT sourcepackagename FROM distributionsourcepackagecache
WHERE archive IN (1, 534) OR archive IS NULL)
ORDER BY name LIMIT 10) AS spn
UNION
(SELECT name FROM binarypackagename
WHERE
name LIKE '%linux%'
AND id IN (
SELECT binarypackagename FROM distroseriespackagecache
WHERE archive IN (1, 534))
ORDER BY name LIMIT 10)
ORDER BY name LIMIT 10;
-- ~5-50ms depending on name. And not too ugly.