Add uri_normalize function

This commit is contained in:
Peter Eisentraut 2015-04-12 16:01:03 -04:00
parent 527cc23bc4
commit 8a02974787
5 changed files with 123 additions and 2 deletions

View File

@ -110,3 +110,19 @@ A number of functions are provided to extract parts of a URI:
Extracts the fragment part of a URI (roughly speaking, everything
after the `#`). If there is no fragment part, returns null.
Other functions:
- `uri_normalize(uri) returns uri`
Performs syntax-based normalization of the URI. This includes
case normalization, percent-encoding normalization, and removing
redundant `.` and `..` path segments. See
[RFC 3986 section 6.2.2](http://tools.ietf.org/html/rfc3986#section-6.2.2)
for the full details.
Note that this module (and similar modules in other programming
languages) compares URIs for equality in their original form,
without normalization. If you want to consider distinct URIs
without regard for mostly irrelevant syntax differences, pass them
through this function.

View File

@ -16,6 +16,11 @@ VALUES ('http://www.postgresql.org/'),
('/'),
('foobar'),
('/foobar');
-- normalization test values from <https://tools.ietf.org/html/rfc3986#section-6.2.2>
INSERT INTO test (b)
VALUES ('HTTP://www.EXAMPLE.com/'),
('http://www.ex%41mple.com/'),
('eXAMPLE://a/./b/../b/%63/%7bfoo%7d');
SELECT * FROM test;
a | b
----+-----------------------------------------------------------------------------------------
@ -33,7 +38,10 @@ SELECT * FROM test;
12 | /
13 | foobar
14 | /foobar
(14 rows)
15 | HTTP://www.EXAMPLE.com/
16 | http://www.ex%41mple.com/
17 | eXAMPLE://a/./b/../b/%63/%7bfoo%7d
(17 rows)
-- error cases
SELECT uri 'http://host:port/';
@ -42,6 +50,7 @@ LINE 1: SELECT uri 'http://host:port/';
^
\x on
SELECT b AS uri,
uri_normalize(b),
uri_scheme(b),
uri_userinfo(b),
uri_host(b),
@ -54,6 +63,7 @@ SELECT b AS uri,
FROM test;
-[ RECORD 1 ]--+----------------------------------------------------------------------------------------
uri | http://www.postgresql.org/
uri_normalize | http://www.postgresql.org/
uri_scheme | http
uri_userinfo | _null_
uri_host | www.postgresql.org
@ -65,6 +75,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 2 ]--+----------------------------------------------------------------------------------------
uri | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
uri_normalize | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
uri_scheme | http
uri_userinfo | _null_
uri_host | www.postgresql.org
@ -76,6 +87,7 @@ uri_query | _null_
uri_fragment | XFUNC-SQL-FUNCTION-ARGUMENTS
-[ RECORD 3 ]--+----------------------------------------------------------------------------------------
uri | https://duckduckgo.com/?q=postgresql&ia=about
uri_normalize | https://duckduckgo.com/?q=postgresql&ia=about
uri_scheme | https
uri_userinfo | _null_
uri_host | duckduckgo.com
@ -87,6 +99,7 @@ uri_query | q=postgresql&ia=about
uri_fragment | _null_
-[ RECORD 4 ]--+----------------------------------------------------------------------------------------
uri | ftp://ftp.gnu.org/gnu/bison
uri_normalize | ftp://ftp.gnu.org/gnu/bison
uri_scheme | ftp
uri_userinfo | _null_
uri_host | ftp.gnu.org
@ -98,6 +111,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 5 ]--+----------------------------------------------------------------------------------------
uri | mailto:foo@example.com
uri_normalize | mailto:foo@example.com
uri_scheme | mailto
uri_userinfo | _null_
uri_host | _null_
@ -109,6 +123,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 6 ]--+----------------------------------------------------------------------------------------
uri | ssh://username@review.openstack.org:29418/openstack/nova.git
uri_normalize | ssh://username@review.openstack.org:29418/openstack/nova.git
uri_scheme | ssh
uri_userinfo | username
uri_host | review.openstack.org
@ -120,6 +135,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 7 ]--+----------------------------------------------------------------------------------------
uri | http://admin:password@192.168.0.1
uri_normalize | http://admin:password@192.168.0.1
uri_scheme | http
uri_userinfo | admin:password
uri_host | 192.168.0.1
@ -131,6 +147,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 8 ]--+----------------------------------------------------------------------------------------
uri | http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
uri_normalize | http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:80/index.html
uri_scheme | http
uri_userinfo | _null_
uri_host | FEDC:BA98:7654:3210:FEDC:BA98:7654:3210
@ -142,6 +159,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 9 ]--+----------------------------------------------------------------------------------------
uri | http://[1080::8:800:200C:417A]/foo
uri_normalize | http://[1080:0000:0000:0000:0008:0800:200c:417a]/foo
uri_scheme | http
uri_userinfo | _null_
uri_host | 1080::8:800:200C:417A
@ -153,6 +171,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 10 ]-+----------------------------------------------------------------------------------------
uri | http://host:
uri_normalize | http://host:
uri_scheme | http
uri_userinfo | _null_
uri_host | host
@ -164,6 +183,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 11 ]-+----------------------------------------------------------------------------------------
uri |
uri_normalize |
uri_scheme | _null_
uri_userinfo | _null_
uri_host | _null_
@ -175,6 +195,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 12 ]-+----------------------------------------------------------------------------------------
uri | /
uri_normalize | /
uri_scheme | _null_
uri_userinfo | _null_
uri_host | _null_
@ -186,6 +207,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 13 ]-+----------------------------------------------------------------------------------------
uri | foobar
uri_normalize | foobar
uri_scheme | _null_
uri_userinfo | _null_
uri_host | _null_
@ -197,6 +219,7 @@ uri_query | _null_
uri_fragment | _null_
-[ RECORD 14 ]-+----------------------------------------------------------------------------------------
uri | /foobar
uri_normalize | /foobar
uri_scheme | _null_
uri_userinfo | _null_
uri_host | _null_
@ -206,6 +229,42 @@ uri_path | /foobar
uri_path_array | {foobar}
uri_query | _null_
uri_fragment | _null_
-[ RECORD 15 ]-+----------------------------------------------------------------------------------------
uri | HTTP://www.EXAMPLE.com/
uri_normalize | http://www.example.com/
uri_scheme | HTTP
uri_userinfo | _null_
uri_host | www.EXAMPLE.com
uri_host_inet | _null_
uri_port | _null_
uri_path | /
uri_path_array | {""}
uri_query | _null_
uri_fragment | _null_
-[ RECORD 16 ]-+----------------------------------------------------------------------------------------
uri | http://www.ex%41mple.com/
uri_normalize | http://www.example.com/
uri_scheme | http
uri_userinfo | _null_
uri_host | www.ex%41mple.com
uri_host_inet | _null_
uri_port | _null_
uri_path | /
uri_path_array | {""}
uri_query | _null_
uri_fragment | _null_
-[ RECORD 17 ]-+----------------------------------------------------------------------------------------
uri | eXAMPLE://a/./b/../b/%63/%7bfoo%7d
uri_normalize | example://a/b/c/%7Bfoo%7D
uri_scheme | eXAMPLE
uri_userinfo | _null_
uri_host | a
uri_host_inet | _null_
uri_port | _null_
uri_path | /./b/../b/%63/%7bfoo%7d
uri_path_array | {.,b,..,b,%63,%7bfoo%7d}
uri_query | _null_
uri_fragment | _null_
\x off
SELECT DISTINCT b FROM test ORDER BY b;
@ -214,16 +273,19 @@ SELECT DISTINCT b FROM test ORDER BY b;
/
/foobar
HTTP://www.EXAMPLE.com/
eXAMPLE://a/./b/../b/%63/%7bfoo%7d
foobar
ftp://ftp.gnu.org/gnu/bison
http://[1080::8:800:200C:417A]/foo
http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
http://admin:password@192.168.0.1
http://host:
http://www.ex%41mple.com/
http://www.postgresql.org/
http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
https://duckduckgo.com/?q=postgresql&ia=about
mailto:foo@example.com
ssh://username@review.openstack.org:29418/openstack/nova.git
(14 rows)
(17 rows)

View File

@ -20,6 +20,12 @@ VALUES ('http://www.postgresql.org/'),
('foobar'),
('/foobar');
-- normalization test values from <https://tools.ietf.org/html/rfc3986#section-6.2.2>
INSERT INTO test (b)
VALUES ('HTTP://www.EXAMPLE.com/'),
('http://www.ex%41mple.com/'),
('eXAMPLE://a/./b/../b/%63/%7bfoo%7d');
SELECT * FROM test;
-- error cases
@ -28,6 +34,7 @@ SELECT uri 'http://host:port/';
\x on
SELECT b AS uri,
uri_normalize(b),
uri_scheme(b),
uri_userinfo(b),
uri_host(b),

29
uri.c
View File

@ -277,6 +277,35 @@ uri_path_array(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
}
PG_FUNCTION_INFO_V1(uri_normalize);
Datum
uri_normalize(PG_FUNCTION_ARGS)
{
Datum arg = PG_GETARG_DATUM(0);
char *s = TextDatumGetCString(arg);
UriUriA uri;
int rc;
int charsRequired;
char *ret;
parse_uri(s, &uri);
if ((rc = uriNormalizeSyntaxA(&uri)) != URI_SUCCESS)
elog(ERROR, "uriNormalizeSyntaxA() failed: error code %d", rc);
if ((rc = uriToStringCharsRequiredA(&uri, &charsRequired)) != URI_SUCCESS)
elog(ERROR, "uriToStringCharsRequiredA() failed: error code %d", rc);
charsRequired++;
ret = palloc(charsRequired);
if ((rc = uriToStringA(ret, &uri, charsRequired, NULL)) != URI_SUCCESS)
elog(ERROR, "uriToStringA() failed: error code %d", rc);
uriFreeUriMembersA(&uri);
PG_RETURN_URI_P((uritype *) cstring_to_text(ret));
}
static int
cmp_text_range(UriTextRangeA a, UriTextRangeA b)
{

View File

@ -81,6 +81,13 @@ CREATE FUNCTION uri_path_array(uri) RETURNS text[]
AS '$libdir/uri';
CREATE FUNCTION uri_normalize(uri) RETURNS uri
IMMUTABLE
STRICT
LANGUAGE C
AS '$libdir/uri';
CREATE FUNCTION uri_lt(uri, uri) RETURNS boolean
IMMUTABLE
STRICT