Add uri_normalize function

This commit is contained in:
Peter Eisentraut 2015-04-12 16:01:03 -04:00
parent 527cc23bc4
commit 8a02974787
5 changed files with 123 additions and 2 deletions

View File

@ -110,3 +110,19 @@ A number of functions are provided to extract parts of a URI:
Extracts the fragment part of a URI (roughly speaking, everything Extracts the fragment part of a URI (roughly speaking, everything
after the `#`). If there is no fragment part, returns null. after the `#`). If there is no fragment part, returns null.
Other functions:
- `uri_normalize(uri) returns uri`
Performs syntax-based normalization of the URI. This includes
case normalization, percent-encoding normalization, and removing
redundant `.` and `..` path segments. See
[RFC 3986 section 6.2.2](http://tools.ietf.org/html/rfc3986#section-6.2.2)
for the full details.
Note that this module (and similar modules in other programming
languages) compares URIs for equality in their original form,
without normalization. If you want to consider distinct URIs
without regard for mostly irrelevant syntax differences, pass them
through this function.

View File

@ -16,6 +16,11 @@ VALUES ('http://www.postgresql.org/'),
('/'), ('/'),
('foobar'), ('foobar'),
('/foobar'); ('/foobar');
-- normalization test values from <https://tools.ietf.org/html/rfc3986#section-6.2.2>
INSERT INTO test (b)
VALUES ('HTTP://www.EXAMPLE.com/'),
('http://www.ex%41mple.com/'),
('eXAMPLE://a/./b/../b/%63/%7bfoo%7d');
SELECT * FROM test; SELECT * FROM test;
a | b a | b
----+----------------------------------------------------------------------------------------- ----+-----------------------------------------------------------------------------------------
@ -33,7 +38,10 @@ SELECT * FROM test;
12 | / 12 | /
13 | foobar 13 | foobar
14 | /foobar 14 | /foobar
(14 rows) 15 | HTTP://www.EXAMPLE.com/
16 | http://www.ex%41mple.com/
17 | eXAMPLE://a/./b/../b/%63/%7bfoo%7d
(17 rows)
-- error cases -- error cases
SELECT uri 'http://host:port/'; SELECT uri 'http://host:port/';
@ -42,6 +50,7 @@ LINE 1: SELECT uri 'http://host:port/';
^ ^
\x on \x on
SELECT b AS uri, SELECT b AS uri,
uri_normalize(b),
uri_scheme(b), uri_scheme(b),
uri_userinfo(b), uri_userinfo(b),
uri_host(b), uri_host(b),
@ -54,6 +63,7 @@ SELECT b AS uri,
FROM test; FROM test;
-[ RECORD 1 ]--+---------------------------------------------------------------------------------------- -[ RECORD 1 ]--+----------------------------------------------------------------------------------------
uri | http://www.postgresql.org/ uri | http://www.postgresql.org/
uri_normalize | http://www.postgresql.org/
uri_scheme | http uri_scheme | http
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | www.postgresql.org uri_host | www.postgresql.org
@ -65,6 +75,7 @@ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 2 ]--+---------------------------------------------------------------------------------------- -[ RECORD 2 ]--+----------------------------------------------------------------------------------------
uri | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS uri | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
uri_normalize | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
uri_scheme | http uri_scheme | http
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | www.postgresql.org uri_host | www.postgresql.org
@ -76,6 +87,7 @@ uri_query | _null_
uri_fragment | XFUNC-SQL-FUNCTION-ARGUMENTS uri_fragment | XFUNC-SQL-FUNCTION-ARGUMENTS
-[ RECORD 3 ]--+---------------------------------------------------------------------------------------- -[ RECORD 3 ]--+----------------------------------------------------------------------------------------
uri | https://duckduckgo.com/?q=postgresql&ia=about uri | https://duckduckgo.com/?q=postgresql&ia=about
uri_normalize | https://duckduckgo.com/?q=postgresql&ia=about
uri_scheme | https uri_scheme | https
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | duckduckgo.com uri_host | duckduckgo.com
@ -87,6 +99,7 @@ uri_query | q=postgresql&ia=about
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 4 ]--+---------------------------------------------------------------------------------------- -[ RECORD 4 ]--+----------------------------------------------------------------------------------------
uri | ftp://ftp.gnu.org/gnu/bison uri | ftp://ftp.gnu.org/gnu/bison
uri_normalize | ftp://ftp.gnu.org/gnu/bison
uri_scheme | ftp uri_scheme | ftp
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | ftp.gnu.org uri_host | ftp.gnu.org
@ -98,6 +111,7 @@ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 5 ]--+---------------------------------------------------------------------------------------- -[ RECORD 5 ]--+----------------------------------------------------------------------------------------
uri | mailto:foo@example.com uri | mailto:foo@example.com
uri_normalize | mailto:foo@example.com
uri_scheme | mailto uri_scheme | mailto
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | _null_ uri_host | _null_
@ -109,6 +123,7 @@ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 6 ]--+---------------------------------------------------------------------------------------- -[ RECORD 6 ]--+----------------------------------------------------------------------------------------
uri | ssh://username@review.openstack.org:29418/openstack/nova.git uri | ssh://username@review.openstack.org:29418/openstack/nova.git
uri_normalize | ssh://username@review.openstack.org:29418/openstack/nova.git
uri_scheme | ssh uri_scheme | ssh
uri_userinfo | username uri_userinfo | username
uri_host | review.openstack.org uri_host | review.openstack.org
@ -120,6 +135,7 @@ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 7 ]--+---------------------------------------------------------------------------------------- -[ RECORD 7 ]--+----------------------------------------------------------------------------------------
uri | http://admin:password@192.168.0.1 uri | http://admin:password@192.168.0.1
uri_normalize | http://admin:password@192.168.0.1
uri_scheme | http uri_scheme | http
uri_userinfo | admin:password uri_userinfo | admin:password
uri_host | 192.168.0.1 uri_host | 192.168.0.1
@ -131,6 +147,7 @@ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 8 ]--+---------------------------------------------------------------------------------------- -[ RECORD 8 ]--+----------------------------------------------------------------------------------------
uri | http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html uri | http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
uri_normalize | http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:80/index.html
uri_scheme | http uri_scheme | http
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | FEDC:BA98:7654:3210:FEDC:BA98:7654:3210 uri_host | FEDC:BA98:7654:3210:FEDC:BA98:7654:3210
@ -142,6 +159,7 @@ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 9 ]--+---------------------------------------------------------------------------------------- -[ RECORD 9 ]--+----------------------------------------------------------------------------------------
uri | http://[1080::8:800:200C:417A]/foo uri | http://[1080::8:800:200C:417A]/foo
uri_normalize | http://[1080:0000:0000:0000:0008:0800:200c:417a]/foo
uri_scheme | http uri_scheme | http
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | 1080::8:800:200C:417A uri_host | 1080::8:800:200C:417A
@ -153,6 +171,7 @@ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 10 ]-+---------------------------------------------------------------------------------------- -[ RECORD 10 ]-+----------------------------------------------------------------------------------------
uri | http://host: uri | http://host:
uri_normalize | http://host:
uri_scheme | http uri_scheme | http
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | host uri_host | host
@ -164,6 +183,7 @@ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 11 ]-+---------------------------------------------------------------------------------------- -[ RECORD 11 ]-+----------------------------------------------------------------------------------------
uri | uri |
uri_normalize |
uri_scheme | _null_ uri_scheme | _null_
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | _null_ uri_host | _null_
@ -175,6 +195,7 @@ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 12 ]-+---------------------------------------------------------------------------------------- -[ RECORD 12 ]-+----------------------------------------------------------------------------------------
uri | / uri | /
uri_normalize | /
uri_scheme | _null_ uri_scheme | _null_
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | _null_ uri_host | _null_
@ -186,6 +207,7 @@ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 13 ]-+---------------------------------------------------------------------------------------- -[ RECORD 13 ]-+----------------------------------------------------------------------------------------
uri | foobar uri | foobar
uri_normalize | foobar
uri_scheme | _null_ uri_scheme | _null_
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | _null_ uri_host | _null_
@ -197,6 +219,7 @@ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 14 ]-+---------------------------------------------------------------------------------------- -[ RECORD 14 ]-+----------------------------------------------------------------------------------------
uri | /foobar uri | /foobar
uri_normalize | /foobar
uri_scheme | _null_ uri_scheme | _null_
uri_userinfo | _null_ uri_userinfo | _null_
uri_host | _null_ uri_host | _null_
@ -206,6 +229,42 @@ uri_path | /foobar
uri_path_array | {foobar} uri_path_array | {foobar}
uri_query | _null_ uri_query | _null_
uri_fragment | _null_ uri_fragment | _null_
-[ RECORD 15 ]-+----------------------------------------------------------------------------------------
uri | HTTP://www.EXAMPLE.com/
uri_normalize | http://www.example.com/
uri_scheme | HTTP
uri_userinfo | _null_
uri_host | www.EXAMPLE.com
uri_host_inet | _null_
uri_port | _null_
uri_path | /
uri_path_array | {""}
uri_query | _null_
uri_fragment | _null_
-[ RECORD 16 ]-+----------------------------------------------------------------------------------------
uri | http://www.ex%41mple.com/
uri_normalize | http://www.example.com/
uri_scheme | http
uri_userinfo | _null_
uri_host | www.ex%41mple.com
uri_host_inet | _null_
uri_port | _null_
uri_path | /
uri_path_array | {""}
uri_query | _null_
uri_fragment | _null_
-[ RECORD 17 ]-+----------------------------------------------------------------------------------------
uri | eXAMPLE://a/./b/../b/%63/%7bfoo%7d
uri_normalize | example://a/b/c/%7Bfoo%7D
uri_scheme | eXAMPLE
uri_userinfo | _null_
uri_host | a
uri_host_inet | _null_
uri_port | _null_
uri_path | /./b/../b/%63/%7bfoo%7d
uri_path_array | {.,b,..,b,%63,%7bfoo%7d}
uri_query | _null_
uri_fragment | _null_
\x off \x off
SELECT DISTINCT b FROM test ORDER BY b; SELECT DISTINCT b FROM test ORDER BY b;
@ -214,16 +273,19 @@ SELECT DISTINCT b FROM test ORDER BY b;
/ /
/foobar /foobar
HTTP://www.EXAMPLE.com/
eXAMPLE://a/./b/../b/%63/%7bfoo%7d
foobar foobar
ftp://ftp.gnu.org/gnu/bison ftp://ftp.gnu.org/gnu/bison
http://[1080::8:800:200C:417A]/foo http://[1080::8:800:200C:417A]/foo
http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
http://admin:password@192.168.0.1 http://admin:password@192.168.0.1
http://host: http://host:
http://www.ex%41mple.com/
http://www.postgresql.org/ http://www.postgresql.org/
http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
https://duckduckgo.com/?q=postgresql&ia=about https://duckduckgo.com/?q=postgresql&ia=about
mailto:foo@example.com mailto:foo@example.com
ssh://username@review.openstack.org:29418/openstack/nova.git ssh://username@review.openstack.org:29418/openstack/nova.git
(14 rows) (17 rows)

View File

@ -20,6 +20,12 @@ VALUES ('http://www.postgresql.org/'),
('foobar'), ('foobar'),
('/foobar'); ('/foobar');
-- normalization test values from <https://tools.ietf.org/html/rfc3986#section-6.2.2>
INSERT INTO test (b)
VALUES ('HTTP://www.EXAMPLE.com/'),
('http://www.ex%41mple.com/'),
('eXAMPLE://a/./b/../b/%63/%7bfoo%7d');
SELECT * FROM test; SELECT * FROM test;
-- error cases -- error cases
@ -28,6 +34,7 @@ SELECT uri 'http://host:port/';
\x on \x on
SELECT b AS uri, SELECT b AS uri,
uri_normalize(b),
uri_scheme(b), uri_scheme(b),
uri_userinfo(b), uri_userinfo(b),
uri_host(b), uri_host(b),

29
uri.c
View File

@ -277,6 +277,35 @@ uri_path_array(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID)); PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
} }
PG_FUNCTION_INFO_V1(uri_normalize);
Datum
uri_normalize(PG_FUNCTION_ARGS)
{
Datum arg = PG_GETARG_DATUM(0);
char *s = TextDatumGetCString(arg);
UriUriA uri;
int rc;
int charsRequired;
char *ret;
parse_uri(s, &uri);
if ((rc = uriNormalizeSyntaxA(&uri)) != URI_SUCCESS)
elog(ERROR, "uriNormalizeSyntaxA() failed: error code %d", rc);
if ((rc = uriToStringCharsRequiredA(&uri, &charsRequired)) != URI_SUCCESS)
elog(ERROR, "uriToStringCharsRequiredA() failed: error code %d", rc);
charsRequired++;
ret = palloc(charsRequired);
if ((rc = uriToStringA(ret, &uri, charsRequired, NULL)) != URI_SUCCESS)
elog(ERROR, "uriToStringA() failed: error code %d", rc);
uriFreeUriMembersA(&uri);
PG_RETURN_URI_P((uritype *) cstring_to_text(ret));
}
static int static int
cmp_text_range(UriTextRangeA a, UriTextRangeA b) cmp_text_range(UriTextRangeA a, UriTextRangeA b)
{ {

View File

@ -81,6 +81,13 @@ CREATE FUNCTION uri_path_array(uri) RETURNS text[]
AS '$libdir/uri'; AS '$libdir/uri';
CREATE FUNCTION uri_normalize(uri) RETURNS uri
IMMUTABLE
STRICT
LANGUAGE C
AS '$libdir/uri';
CREATE FUNCTION uri_lt(uri, uri) RETURNS boolean CREATE FUNCTION uri_lt(uri, uri) RETURNS boolean
IMMUTABLE IMMUTABLE
STRICT STRICT