From 290dd477f3168bc09c7179960201ea5e9411c7a6 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Sat, 3 Oct 2015 00:31:37 -0400 Subject: [PATCH] Add escape/unescape functions --- Makefile | 2 +- README.md | 19 ++++++++ test/expected/escape.out | 99 ++++++++++++++++++++++++++++++++++++++++ test/sql/escape.sql | 21 +++++++++ uri--0--1.sql | 12 +++++ uri--1.sql | 12 +++++ uri.c | 36 +++++++++++++++ 7 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 test/expected/escape.out create mode 100644 test/sql/escape.sql diff --git a/Makefile b/Makefile index 855dc8e..a9b153d 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ endif PG_CPPFLAGS += $(shell $(PKG_CONFIG) --cflags-only-I liburiparser) SHLIB_LINK += $(shell $(PKG_CONFIG) --libs liburiparser) -REGRESS = init test +REGRESS = init test escape REGRESS_OPTS = --inputdir=test PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/README.md b/README.md index 4697d87..6565ef8 100644 --- a/README.md +++ b/README.md @@ -126,3 +126,22 @@ Other functions: without normalization. If you want to consider distinct URIs without regard for mostly irrelevant syntax differences, pass them through this function. + +- `uri_escape(text, space_to_plus boolean DEFAULT false, normalize_breaks boolean DEFAULT false) returns text` + + Percent-encodes all reserved characters from the text. This can + be useful for constructing URIs from strings. + + If `space_to_plus` is true, then spaces are replaced by plus + signs. If `normalize_breaks` is true, then line breaks are + converted to CR LF pairs (and subsequently percent-encoded). Note + that these two conversions come from the HTML standard for + encoding form data but are not part of the specification for URIs. + +- `uri_unescape(text, plus_to_space boolean DEFAULT false, break_conversion boolean DEFAULT false) returns text` + + Decodes all percent-encodings in the text. + + If `plus_to_space` is true, then plus signs are converted to + spaces. If `break_conversion` is true, then CR LF pairs are + converted to simple newlines (`\n`). diff --git a/test/expected/escape.out b/test/expected/escape.out new file mode 100644 index 0000000..164b030 --- /dev/null +++ b/test/expected/escape.out @@ -0,0 +1,99 @@ +SELECT uri_escape('foobar'); + uri_escape +------------ + foobar +(1 row) + +SELECT uri_escape(':/?#[]@!$&''()*+,;='); + uri_escape +-------------------------------------------------------- + %3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D +(1 row) + +SELECT uri_escape('foo bar'); + uri_escape +------------ + foo%20bar +(1 row) + +SELECT uri_escape('foo bar', space_to_plus := false); + uri_escape +------------ + foo%20bar +(1 row) + +SELECT uri_escape('foo bar', space_to_plus := true); + uri_escape +------------ + foo+bar +(1 row) + +SELECT uri_escape(E'foo\nbar'); + uri_escape +------------ + foo%0Abar +(1 row) + +SELECT uri_escape(E'foo\nbar', normalize_breaks := false); + uri_escape +------------ + foo%0Abar +(1 row) + +SELECT uri_escape(E'foo\nbar', normalize_breaks := true); + uri_escape +-------------- + foo%0D%0Abar +(1 row) + +SELECT uri_unescape('foobar'); + uri_unescape +-------------- + foobar +(1 row) + +SELECT uri_unescape('%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D'); + uri_unescape +-------------------- + :/?#[]@!$&'()*+,;= +(1 row) + +SELECT uri_unescape('foo+bar'); + uri_unescape +-------------- + foo+bar +(1 row) + +SELECT uri_unescape('foo+bar', plus_to_space := false); + uri_unescape +-------------- + foo+bar +(1 row) + +SELECT uri_unescape('foo+bar', plus_to_space := true); + uri_unescape +-------------- + foo bar +(1 row) + +SELECT uri_unescape('foo%0D%0Abar'); + uri_unescape +-------------- + foo + + bar +(1 row) + +SELECT uri_unescape('foo%0D%0Abar', break_conversion := false); + uri_unescape +-------------- + foo + + bar +(1 row) + +SELECT uri_unescape('foo%0D%0Abar', break_conversion := true); + uri_unescape +-------------- + foo\r + + bar +(1 row) + diff --git a/test/sql/escape.sql b/test/sql/escape.sql new file mode 100644 index 0000000..af4b0f1 --- /dev/null +++ b/test/sql/escape.sql @@ -0,0 +1,21 @@ +SELECT uri_escape('foobar'); +SELECT uri_escape(':/?#[]@!$&''()*+,;='); + +SELECT uri_escape('foo bar'); +SELECT uri_escape('foo bar', space_to_plus := false); +SELECT uri_escape('foo bar', space_to_plus := true); + +SELECT uri_escape(E'foo\nbar'); +SELECT uri_escape(E'foo\nbar', normalize_breaks := false); +SELECT uri_escape(E'foo\nbar', normalize_breaks := true); + +SELECT uri_unescape('foobar'); +SELECT uri_unescape('%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D'); + +SELECT uri_unescape('foo+bar'); +SELECT uri_unescape('foo+bar', plus_to_space := false); +SELECT uri_unescape('foo+bar', plus_to_space := true); + +SELECT uri_unescape('foo%0D%0Abar'); +SELECT uri_unescape('foo%0D%0Abar', break_conversion := false); +SELECT uri_unescape('foo%0D%0Abar', break_conversion := true); diff --git a/uri--0--1.sql b/uri--0--1.sql index c862c9c..ffcb95c 100644 --- a/uri--0--1.sql +++ b/uri--0--1.sql @@ -8,3 +8,15 @@ CREATE OPERATOR CLASS uri_ops_hash DEFAULT FOR TYPE uri USING hash AS OPERATOR 1 =, FUNCTION 1 uri_hash(uri); + +CREATE FUNCTION uri_escape(text, space_to_plus boolean DEFAULT false, normalize_breaks boolean DEFAULT false) RETURNS text + IMMUTABLE + STRICT + LANGUAGE C + AS '$libdir/uri'; + +CREATE FUNCTION uri_unescape(text, plus_to_space boolean DEFAULT false, break_conversion boolean DEFAULT false) RETURNS text + IMMUTABLE + STRICT + LANGUAGE C + AS '$libdir/uri'; diff --git a/uri--1.sql b/uri--1.sql index 7f68cb1..00833da 100644 --- a/uri--1.sql +++ b/uri--1.sql @@ -211,3 +211,15 @@ CREATE OPERATOR CLASS uri_ops_hash DEFAULT FOR TYPE uri USING hash AS OPERATOR 1 =, FUNCTION 1 uri_hash(uri); + +CREATE FUNCTION uri_escape(text, space_to_plus boolean DEFAULT false, normalize_breaks boolean DEFAULT false) RETURNS text + IMMUTABLE + STRICT + LANGUAGE C + AS '$libdir/uri'; + +CREATE FUNCTION uri_unescape(text, plus_to_space boolean DEFAULT false, break_conversion boolean DEFAULT false) RETURNS text + IMMUTABLE + STRICT + LANGUAGE C + AS '$libdir/uri'; diff --git a/uri.c b/uri.c index 85ca239..06bf85d 100644 --- a/uri.c +++ b/uri.c @@ -455,3 +455,39 @@ uri_hash(PG_FUNCTION_ARGS) return result; } + +PG_FUNCTION_INFO_V1(uri_escape); +Datum +uri_escape(PG_FUNCTION_ARGS) +{ + text *arg = PG_GETARG_TEXT_PP(0); + bool space_to_plus = PG_GETARG_BOOL(1); + bool normalize_breaks = PG_GETARG_BOOL(2); + + size_t chars_required; + char *ret; + + chars_required = (VARSIZE(arg) - 4) * (normalize_breaks ? 6 : 3) + 1; + ret = palloc(chars_required); + uriEscapeExA(VARDATA(arg), + VARDATA(arg) + VARSIZE(arg) - 4, + ret, + space_to_plus, normalize_breaks); + + PG_RETURN_TEXT_P(cstring_to_text(ret)); +} + +PG_FUNCTION_INFO_V1(uri_unescape); +Datum +uri_unescape(PG_FUNCTION_ARGS) +{ + text *arg = PG_GETARG_TEXT_PP(0); + bool plus_to_space = PG_GETARG_BOOL(1); + bool break_conversion = PG_GETARG_BOOL(2); + + char *s = text_to_cstring(arg); + + uriUnescapeInPlaceExA(s, plus_to_space, break_conversion); + + PG_RETURN_TEXT_P(cstring_to_text(s)); +}