Ubuntu Pastebin

Paste from wgrant at Fri, 3 Jun 2016 00:12:07 +0000

Download as text
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
-- Original query.
EXPLAIN (ANALYZE, BUFFERS)
SELECT name
FROM
    (SELECT name FROM sourcepackagename
     WHERE
        name LIKE '%man%'
        AND id IN (
            SELECT sourcepackagename FROM distributionsourcepackagecache
            WHERE archive IN (SELECT id FROM archive WHERE purpose IN (1, 4)) OR archive IS NULL)
     ORDER BY name LIMIT 10) AS spn
    UNION
    (SELECT name FROM binarypackagename
     WHERE
        name LIKE '%man%'
        AND id IN (
            SELECT binarypackagename FROM distroseriespackagecache
            WHERE archive IN (SELECT id FROM archive WHERE purpose IN (1, 4)))
        ORDER BY name LIMIT 10)
ORDER BY name LIMIT 10;


-- Query is artifical; should be restricted by distro, and we can use
-- main_archive_ids to avoid a point of confusion for the planner.
--
-- Also indexes. Indexes everywhere.
-- CREATE INDEX temp_dspc_really ON distroseriespackagecache (archive, binarypackagename);
-- CREATE INDEX temp_dspc_really_the_other_one ON distributionsourcepackagecache (archive, sourcepackagename);
EXPLAIN (ANALYZE, BUFFERS)
SELECT name
FROM
    (SELECT name FROM sourcepackagename
     WHERE
        name LIKE '%man%'
        AND id IN (
            SELECT sourcepackagename FROM distributionsourcepackagecache
            WHERE archive IN (1, 534) OR archive IS NULL)
     ORDER BY name LIMIT 10) AS spn
    UNION
    (SELECT name FROM binarypackagename
     WHERE
        name LIKE '%man%'
        AND id IN (
            SELECT binarypackagename FROM distroseriespackagecache
            WHERE archive IN (1, 534))
        ORDER BY name LIMIT 10)
ORDER BY name LIMIT 10;
-- ~100ms


-- The source query still picks a hash join rather than a nested loop.
-- But we have the name data in the tables themselves.
-- CREATE EXTENSION btree_gin;
-- CREATE INDEX temp_dspc_hm_trgm_maybe ON distributionsourcepackagecache USING gin (archive, name trgm.gin_trgm_ops);
EXPLAIN (ANALYZE, BUFFERS)
SELECT name
FROM
    (SELECT name FROM distributionsourcepackagecache
     WHERE
        name LIKE '%linux%'
        AND archive IN (1, 534) OR archive IS NULL
     ORDER BY name LIMIT 10) AS spn
    UNION
    (SELECT name FROM binarypackagename
     WHERE
        name LIKE '%linux%'
        AND id IN (
            SELECT binarypackagename FROM distroseriespackagecache
            WHERE archive IN (1, 534))
        ORDER BY name LIMIT 10)
ORDER BY name LIMIT 10;
-- <10ms

-- But why was it going the wrong way for sources? Let's backtrack from the GIN approach a bit.
-- It really wants to hash or merge. What if we invert the index?
-- CREATE INDEX temp_dspc_really_the_other_one_2 ON distributionsourcepackagecache (sourcepackagename, archive);
EXPLAIN (ANALYZE, BUFFERS)
SELECT name
FROM
    (SELECT name FROM sourcepackagename
     WHERE
        name LIKE '%linux%'
        AND id IN (
            SELECT sourcepackagename FROM distributionsourcepackagecache
            WHERE archive IN (1, 534) OR archive IS NULL)
     ORDER BY name LIMIT 10) AS spn
    UNION
    (SELECT name FROM binarypackagename
     WHERE
        name LIKE '%linux%'
        AND id IN (
            SELECT binarypackagename FROM distroseriespackagecache
            WHERE archive IN (1, 534))
        ORDER BY name LIMIT 10)
ORDER BY name LIMIT 10;
-- ~5-50ms depending on name. And not too ugly.
Download as text