This is a continuation of work like
11c34b342bd7, done to reduce the
bloat of pg_stat_statements by applying more normalization to query
entries. This commit is able to detect and normalize values in
VariableSetStmt, resulting in:
SET conf_param = $1
Compared to other parse nodes, VariableSetStmt is embedded in much more
places in the parser, impacting many query patterns in
pg_stat_statements. A custom jumble function is used, with an extra
field in the node to decide if arguments should be included in the
jumbling or not, a location field being not enough for this purpose.
This approach allows for a finer tuning.
Clauses relying on one or more keywords are not normalized, for example:
* DEFAULT
* FROM CURRENT
* List of keywords. SET SESSION CHARACTERISTICS AS TRANSACTION,
where it is critical to differentiate different sets of options, is a
good example of why normalization should not happen.
Some queries use VariableSetStmt for some subclauses with SET, that also
have their values normalized:
- ALTER DATABASE
- ALTER ROLE
- ALTER SYSTEM
- CREATE/ALTER FUNCTION
ba90eac7a995 has added test coverage for most of the existing SET
patterns. The expected output of these tests shows the difference this
commit creates. Normalization could be perhaps applied to more portions
of the grammar but what is done here is conservative, and good enough as
a starting point.
Author: Greg Sabino Mullane, Michael Paquier
Discussion: https://postgr.es/m/
36e5bffe-e989-194f-85c8-
06e7bc88e6f7@amazon.com
Discussion: https://postgr.es/m/
B44FA29D-EBD0-4DD9-ABC2-
16F1CB087074@amazon.com
Discussion: https://postgr.es/m/CAKAnmmJtJY2jzQN91=2QAD2eAJAA-Per61eyO48-TyxEg-q0Rg@mail.gmail.com
2 | 4 | SELECT * FROM pgss_dml_tab WHERE a > $1 ORDER BY a
1 | 8 | SELECT * FROM pgss_dml_tab WHERE a IN ($1, $2, $3, $4, $5)
1 | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
- 1 | 0 | SET pg_stat_statements.track_utility = FALSE
+ 1 | 0 | SET pg_stat_statements.track_utility = $1
6 | 6 | UPDATE pgss_dml_tab SET b = $1 WHERE a = $2
1 | 3 | UPDATE pgss_dml_tab SET b = $1 WHERE a > $2
(10 rows)
| | END; $$
f | 1 | SELECT $1::TEXT
t | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
- t | 1 | SET pg_stat_statements.track = 'all'
+ t | 1 | SET pg_stat_statements.track = $1
(7 rows)
-- Procedure with multiple utility statements.
1 | 0 | CREATE FOREIGN TABLE foreign_stats (a int) SERVER server_stats
1 | 0 | CREATE FUNCTION func_stats(a text DEFAULT 'a_data', b text DEFAULT lower('b_data'))+
| | RETURNS text AS $$ SELECT $1::text || '_' || $2::text; $$ LANGUAGE SQL +
- | | SET work_mem = '256kB'
+ | | SET work_mem = $1
1 | 0 | CREATE FUNCTION trigger_func_stats () RETURNS trigger LANGUAGE plpgsql +
| | AS $$ BEGIN return OLD; end; $$
1 | 0 | CREATE INDEX pt_stats2_index ON ONLY pt_stats2 (a)
1 | 0 | SET LOCAL SESSION AUTHORIZATION 'regress_stat_set_1'
1 | 0 | SET LOCAL SESSION AUTHORIZATION 'regress_stat_set_2'
1 | 0 | SET LOCAL SESSION AUTHORIZATION DEFAULT
- 1 | 0 | SET LOCAL work_mem = '128kB'
- 1 | 0 | SET LOCAL work_mem = '256kB'
+ 2 | 0 | SET LOCAL work_mem = $1
2 | 0 | SET LOCAL work_mem = DEFAULT
1 | 0 | SET LOCAL work_mem FROM CURRENT
1 | 0 | SET SESSION AUTHORIZATION 'regress_stat_set_1'
1 | 0 | SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY, READ ONLY
1 | 0 | SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY, READ WRITE
1 | 0 | SET SESSION SESSION AUTHORIZATION DEFAULT
- 1 | 0 | SET SESSION work_mem = '300kB'
- 1 | 0 | SET SESSION work_mem = '400kB'
1 | 0 | SET TIME ZONE 'America/New_York'
1 | 0 | SET TIME ZONE 'Asia/Tokyo'
1 | 0 | SET TIME ZONE 'CST7CDT,M4.1.0,M10.5.0'
1 | 0 | SET TRANSACTION ISOLATION LEVEL SERIALIZABLE
1 | 0 | SET XML OPTION CONTENT
1 | 0 | SET XML OPTION DOCUMENT
- 1 | 0 | SET enable_seqscan = off
- 1 | 0 | SET enable_seqscan = on
- 2 | 0 | SET work_mem = '1MB'
- 1 | 0 | SET work_mem = '2MB'
+ 2 | 0 | SET enable_seqscan = $1
+ 5 | 0 | SET work_mem = $1
2 | 0 | SET work_mem = DEFAULT
1 | 0 | SET work_mem FROM CURRENT
-(39 rows)
+(34 rows)
DROP ROLE regress_stat_set_1;
DROP ROLE regress_stat_set_2;
-------+------+----------------------------------------------------
1 | 0 | RESET ALL
1 | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
- 1 | 0 | SET SCHEMA 'foo'
- 1 | 0 | SET SCHEMA 'public'
-(4 rows)
+ 2 | 0 | SET SCHEMA $1
+(3 rows)
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
t
DELETE FROM pgss_wal_tab WHERE a > $1 | 1 | 1 | t | t | t
INSERT INTO pgss_wal_tab VALUES(generate_series($1, $2), $3) | 1 | 10 | t | t | t
SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 | 1 | f | f | f
- SET pg_stat_statements.track_utility = FALSE | 1 | 0 | f | f | t
+ SET pg_stat_statements.track_utility = $1 | 1 | 0 | f | f | t
UPDATE pgss_wal_tab SET b = $1 WHERE a > $2 | 1 | 3 | t | t | t
(5 rows)
static void _jumbleNode(JumbleState *jstate, Node *node);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
+static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
/*
* Given a possibly multi-statement source string, confine our attention to the
}
}
}
+
+static void
+_jumbleVariableSetStmt(JumbleState *jstate, Node *node)
+{
+ VariableSetStmt *expr = (VariableSetStmt *) node;
+
+ JUMBLE_FIELD(kind);
+ JUMBLE_STRING(name);
+
+ /*
+ * Account for the list of arguments in query jumbling only if told by the
+ * parser.
+ */
+ if (expr->jumble_args)
+ JUMBLE_NODE(args);
+ JUMBLE_FIELD(is_local);
+ JUMBLE_LOCATION(location);
+}
n->kind = VAR_SET_MULTI;
n->name = "TRANSACTION";
n->args = $2;
+ n->jumble_args = true;
+ n->location = -1;
$$ = n;
}
| SESSION CHARACTERISTICS AS TRANSACTION transaction_mode_list
n->kind = VAR_SET_MULTI;
n->name = "SESSION CHARACTERISTICS";
n->args = $5;
+ n->jumble_args = true;
+ n->location = -1;
$$ = n;
}
| set_rest_more
n->kind = VAR_SET_VALUE;
n->name = $1;
n->args = $3;
+ n->location = @3;
$$ = n;
}
| var_name '=' var_list
n->kind = VAR_SET_VALUE;
n->name = $1;
n->args = $3;
+ n->location = @3;
$$ = n;
}
| var_name TO DEFAULT
n->kind = VAR_SET_DEFAULT;
n->name = $1;
+ n->location = -1;
$$ = n;
}
| var_name '=' DEFAULT
n->kind = VAR_SET_DEFAULT;
n->name = $1;
+ n->location = -1;
$$ = n;
}
;
n->kind = VAR_SET_CURRENT;
n->name = $1;
+ n->location = -1;
$$ = n;
}
/* Special syntaxes mandated by SQL standard: */
n->kind = VAR_SET_VALUE;
n->name = "timezone";
+ n->location = -1;
+ n->jumble_args = true;
if ($3 != NULL)
n->args = list_make1($3);
else
n->kind = VAR_SET_VALUE;
n->name = "search_path";
n->args = list_make1(makeStringConst($2, @2));
+ n->location = @2;
$$ = n;
}
| NAMES opt_encoding
n->kind = VAR_SET_VALUE;
n->name = "client_encoding";
+ n->location = @2;
if ($2 != NULL)
n->args = list_make1(makeStringConst($2, @2));
else
n->kind = VAR_SET_VALUE;
n->name = "role";
n->args = list_make1(makeStringConst($2, @2));
+ n->location = @2;
$$ = n;
}
| SESSION AUTHORIZATION NonReservedWord_or_Sconst
n->kind = VAR_SET_VALUE;
n->name = "session_authorization";
n->args = list_make1(makeStringConst($3, @3));
+ n->location = @3;
$$ = n;
}
| SESSION AUTHORIZATION DEFAULT
n->kind = VAR_SET_DEFAULT;
n->name = "session_authorization";
+ n->location = -1;
$$ = n;
}
| XML_P OPTION document_or_content
n->kind = VAR_SET_VALUE;
n->name = "xmloption";
n->args = list_make1(makeStringConst($3 == XMLOPTION_DOCUMENT ? "DOCUMENT" : "CONTENT", @3));
+ n->jumble_args = true;
+ n->location = -1;
$$ = n;
}
/* Special syntaxes invented by PostgreSQL: */
n->kind = VAR_SET_MULTI;
n->name = "TRANSACTION SNAPSHOT";
n->args = list_make1(makeStringConst($3, @3));
+ n->location = @3;
$$ = n;
}
;
n->kind = VAR_RESET;
n->name = "timezone";
+ n->location = -1;
$$ = n;
}
| TRANSACTION ISOLATION LEVEL
n->kind = VAR_RESET;
n->name = "transaction_isolation";
+ n->location = -1;
$$ = n;
}
| SESSION AUTHORIZATION
n->kind = VAR_RESET;
n->name = "session_authorization";
+ n->location = -1;
$$ = n;
}
;
n->kind = VAR_RESET;
n->name = $1;
+ n->location = -1;
$$ = n;
}
| ALL
VariableSetStmt *n = makeNode(VariableSetStmt);
n->kind = VAR_RESET_ALL;
+ n->location = -1;
$$ = n;
}
;
typedef struct VariableSetStmt
{
+ pg_node_attr(custom_query_jumble)
+
NodeTag type;
VariableSetKind kind;
- char *name; /* variable to be set */
- List *args; /* List of A_Const nodes */
- bool is_local; /* SET LOCAL? */
+ /* variable to be set */
+ char *name;
+ /* List of A_Const nodes */
+ List *args;
+
+ /*
+ * True if arguments should be accounted for in query jumbling. We use a
+ * separate flag rather than query_jumble_ignore on "args" as several
+ * grammar flavors of SET rely on a list of values that are parsed
+ * directly from the grammar's keywords.
+ */
+ bool jumble_args;
+ /* SET LOCAL? */
+ bool is_local;
+ /* token location, or -1 if unknown */
+ ParseLoc location pg_node_attr(query_jumble_location);
} VariableSetStmt;
/* ----------------------