Add uri_normalize function
This commit is contained in:
parent
527cc23bc4
commit
8a02974787
16
README.md
16
README.md
|
@ -110,3 +110,19 @@ A number of functions are provided to extract parts of a URI:
|
|||
|
||||
Extracts the fragment part of a URI (roughly speaking, everything
|
||||
after the `#`). If there is no fragment part, returns null.
|
||||
|
||||
Other functions:
|
||||
|
||||
- `uri_normalize(uri) returns uri`
|
||||
|
||||
Performs syntax-based normalization of the URI. This includes
|
||||
case normalization, percent-encoding normalization, and removing
|
||||
redundant `.` and `..` path segments. See
|
||||
[RFC 3986 section 6.2.2](http://tools.ietf.org/html/rfc3986#section-6.2.2)
|
||||
for the full details.
|
||||
|
||||
Note that this module (and similar modules in other programming
|
||||
languages) compares URIs for equality in their original form,
|
||||
without normalization. If you want to consider distinct URIs
|
||||
without regard for mostly irrelevant syntax differences, pass them
|
||||
through this function.
|
||||
|
|
|
@ -16,6 +16,11 @@ VALUES ('http://www.postgresql.org/'),
|
|||
('/'),
|
||||
('foobar'),
|
||||
('/foobar');
|
||||
-- normalization test values from <https://tools.ietf.org/html/rfc3986#section-6.2.2>
|
||||
INSERT INTO test (b)
|
||||
VALUES ('HTTP://www.EXAMPLE.com/'),
|
||||
('http://www.ex%41mple.com/'),
|
||||
('eXAMPLE://a/./b/../b/%63/%7bfoo%7d');
|
||||
SELECT * FROM test;
|
||||
a | b
|
||||
----+-----------------------------------------------------------------------------------------
|
||||
|
@ -33,7 +38,10 @@ SELECT * FROM test;
|
|||
12 | /
|
||||
13 | foobar
|
||||
14 | /foobar
|
||||
(14 rows)
|
||||
15 | HTTP://www.EXAMPLE.com/
|
||||
16 | http://www.ex%41mple.com/
|
||||
17 | eXAMPLE://a/./b/../b/%63/%7bfoo%7d
|
||||
(17 rows)
|
||||
|
||||
-- error cases
|
||||
SELECT uri 'http://host:port/';
|
||||
|
@ -42,6 +50,7 @@ LINE 1: SELECT uri 'http://host:port/';
|
|||
^
|
||||
\x on
|
||||
SELECT b AS uri,
|
||||
uri_normalize(b),
|
||||
uri_scheme(b),
|
||||
uri_userinfo(b),
|
||||
uri_host(b),
|
||||
|
@ -54,6 +63,7 @@ SELECT b AS uri,
|
|||
FROM test;
|
||||
-[ RECORD 1 ]--+----------------------------------------------------------------------------------------
|
||||
uri | http://www.postgresql.org/
|
||||
uri_normalize | http://www.postgresql.org/
|
||||
uri_scheme | http
|
||||
uri_userinfo | _null_
|
||||
uri_host | www.postgresql.org
|
||||
|
@ -65,6 +75,7 @@ uri_query | _null_
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 2 ]--+----------------------------------------------------------------------------------------
|
||||
uri | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
|
||||
uri_normalize | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
|
||||
uri_scheme | http
|
||||
uri_userinfo | _null_
|
||||
uri_host | www.postgresql.org
|
||||
|
@ -76,6 +87,7 @@ uri_query | _null_
|
|||
uri_fragment | XFUNC-SQL-FUNCTION-ARGUMENTS
|
||||
-[ RECORD 3 ]--+----------------------------------------------------------------------------------------
|
||||
uri | https://duckduckgo.com/?q=postgresql&ia=about
|
||||
uri_normalize | https://duckduckgo.com/?q=postgresql&ia=about
|
||||
uri_scheme | https
|
||||
uri_userinfo | _null_
|
||||
uri_host | duckduckgo.com
|
||||
|
@ -87,6 +99,7 @@ uri_query | q=postgresql&ia=about
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 4 ]--+----------------------------------------------------------------------------------------
|
||||
uri | ftp://ftp.gnu.org/gnu/bison
|
||||
uri_normalize | ftp://ftp.gnu.org/gnu/bison
|
||||
uri_scheme | ftp
|
||||
uri_userinfo | _null_
|
||||
uri_host | ftp.gnu.org
|
||||
|
@ -98,6 +111,7 @@ uri_query | _null_
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 5 ]--+----------------------------------------------------------------------------------------
|
||||
uri | mailto:foo@example.com
|
||||
uri_normalize | mailto:foo@example.com
|
||||
uri_scheme | mailto
|
||||
uri_userinfo | _null_
|
||||
uri_host | _null_
|
||||
|
@ -109,6 +123,7 @@ uri_query | _null_
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 6 ]--+----------------------------------------------------------------------------------------
|
||||
uri | ssh://username@review.openstack.org:29418/openstack/nova.git
|
||||
uri_normalize | ssh://username@review.openstack.org:29418/openstack/nova.git
|
||||
uri_scheme | ssh
|
||||
uri_userinfo | username
|
||||
uri_host | review.openstack.org
|
||||
|
@ -120,6 +135,7 @@ uri_query | _null_
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 7 ]--+----------------------------------------------------------------------------------------
|
||||
uri | http://admin:password@192.168.0.1
|
||||
uri_normalize | http://admin:password@192.168.0.1
|
||||
uri_scheme | http
|
||||
uri_userinfo | admin:password
|
||||
uri_host | 192.168.0.1
|
||||
|
@ -131,6 +147,7 @@ uri_query | _null_
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 8 ]--+----------------------------------------------------------------------------------------
|
||||
uri | http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
|
||||
uri_normalize | http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:80/index.html
|
||||
uri_scheme | http
|
||||
uri_userinfo | _null_
|
||||
uri_host | FEDC:BA98:7654:3210:FEDC:BA98:7654:3210
|
||||
|
@ -142,6 +159,7 @@ uri_query | _null_
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 9 ]--+----------------------------------------------------------------------------------------
|
||||
uri | http://[1080::8:800:200C:417A]/foo
|
||||
uri_normalize | http://[1080:0000:0000:0000:0008:0800:200c:417a]/foo
|
||||
uri_scheme | http
|
||||
uri_userinfo | _null_
|
||||
uri_host | 1080::8:800:200C:417A
|
||||
|
@ -153,6 +171,7 @@ uri_query | _null_
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 10 ]-+----------------------------------------------------------------------------------------
|
||||
uri | http://host:
|
||||
uri_normalize | http://host:
|
||||
uri_scheme | http
|
||||
uri_userinfo | _null_
|
||||
uri_host | host
|
||||
|
@ -164,6 +183,7 @@ uri_query | _null_
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 11 ]-+----------------------------------------------------------------------------------------
|
||||
uri |
|
||||
uri_normalize |
|
||||
uri_scheme | _null_
|
||||
uri_userinfo | _null_
|
||||
uri_host | _null_
|
||||
|
@ -175,6 +195,7 @@ uri_query | _null_
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 12 ]-+----------------------------------------------------------------------------------------
|
||||
uri | /
|
||||
uri_normalize | /
|
||||
uri_scheme | _null_
|
||||
uri_userinfo | _null_
|
||||
uri_host | _null_
|
||||
|
@ -186,6 +207,7 @@ uri_query | _null_
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 13 ]-+----------------------------------------------------------------------------------------
|
||||
uri | foobar
|
||||
uri_normalize | foobar
|
||||
uri_scheme | _null_
|
||||
uri_userinfo | _null_
|
||||
uri_host | _null_
|
||||
|
@ -197,6 +219,7 @@ uri_query | _null_
|
|||
uri_fragment | _null_
|
||||
-[ RECORD 14 ]-+----------------------------------------------------------------------------------------
|
||||
uri | /foobar
|
||||
uri_normalize | /foobar
|
||||
uri_scheme | _null_
|
||||
uri_userinfo | _null_
|
||||
uri_host | _null_
|
||||
|
@ -206,6 +229,42 @@ uri_path | /foobar
|
|||
uri_path_array | {foobar}
|
||||
uri_query | _null_
|
||||
uri_fragment | _null_
|
||||
-[ RECORD 15 ]-+----------------------------------------------------------------------------------------
|
||||
uri | HTTP://www.EXAMPLE.com/
|
||||
uri_normalize | http://www.example.com/
|
||||
uri_scheme | HTTP
|
||||
uri_userinfo | _null_
|
||||
uri_host | www.EXAMPLE.com
|
||||
uri_host_inet | _null_
|
||||
uri_port | _null_
|
||||
uri_path | /
|
||||
uri_path_array | {""}
|
||||
uri_query | _null_
|
||||
uri_fragment | _null_
|
||||
-[ RECORD 16 ]-+----------------------------------------------------------------------------------------
|
||||
uri | http://www.ex%41mple.com/
|
||||
uri_normalize | http://www.example.com/
|
||||
uri_scheme | http
|
||||
uri_userinfo | _null_
|
||||
uri_host | www.ex%41mple.com
|
||||
uri_host_inet | _null_
|
||||
uri_port | _null_
|
||||
uri_path | /
|
||||
uri_path_array | {""}
|
||||
uri_query | _null_
|
||||
uri_fragment | _null_
|
||||
-[ RECORD 17 ]-+----------------------------------------------------------------------------------------
|
||||
uri | eXAMPLE://a/./b/../b/%63/%7bfoo%7d
|
||||
uri_normalize | example://a/b/c/%7Bfoo%7D
|
||||
uri_scheme | eXAMPLE
|
||||
uri_userinfo | _null_
|
||||
uri_host | a
|
||||
uri_host_inet | _null_
|
||||
uri_port | _null_
|
||||
uri_path | /./b/../b/%63/%7bfoo%7d
|
||||
uri_path_array | {.,b,..,b,%63,%7bfoo%7d}
|
||||
uri_query | _null_
|
||||
uri_fragment | _null_
|
||||
|
||||
\x off
|
||||
SELECT DISTINCT b FROM test ORDER BY b;
|
||||
|
@ -214,16 +273,19 @@ SELECT DISTINCT b FROM test ORDER BY b;
|
|||
|
||||
/
|
||||
/foobar
|
||||
HTTP://www.EXAMPLE.com/
|
||||
eXAMPLE://a/./b/../b/%63/%7bfoo%7d
|
||||
foobar
|
||||
ftp://ftp.gnu.org/gnu/bison
|
||||
http://[1080::8:800:200C:417A]/foo
|
||||
http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
|
||||
http://admin:password@192.168.0.1
|
||||
http://host:
|
||||
http://www.ex%41mple.com/
|
||||
http://www.postgresql.org/
|
||||
http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
|
||||
https://duckduckgo.com/?q=postgresql&ia=about
|
||||
mailto:foo@example.com
|
||||
ssh://username@review.openstack.org:29418/openstack/nova.git
|
||||
(14 rows)
|
||||
(17 rows)
|
||||
|
||||
|
|
|
@ -20,6 +20,12 @@ VALUES ('http://www.postgresql.org/'),
|
|||
('foobar'),
|
||||
('/foobar');
|
||||
|
||||
-- normalization test values from <https://tools.ietf.org/html/rfc3986#section-6.2.2>
|
||||
INSERT INTO test (b)
|
||||
VALUES ('HTTP://www.EXAMPLE.com/'),
|
||||
('http://www.ex%41mple.com/'),
|
||||
('eXAMPLE://a/./b/../b/%63/%7bfoo%7d');
|
||||
|
||||
SELECT * FROM test;
|
||||
|
||||
-- error cases
|
||||
|
@ -28,6 +34,7 @@ SELECT uri 'http://host:port/';
|
|||
|
||||
\x on
|
||||
SELECT b AS uri,
|
||||
uri_normalize(b),
|
||||
uri_scheme(b),
|
||||
uri_userinfo(b),
|
||||
uri_host(b),
|
||||
|
|
29
uri.c
29
uri.c
|
@ -277,6 +277,35 @@ uri_path_array(PG_FUNCTION_ARGS)
|
|||
PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(uri_normalize);
|
||||
Datum
|
||||
uri_normalize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum arg = PG_GETARG_DATUM(0);
|
||||
char *s = TextDatumGetCString(arg);
|
||||
UriUriA uri;
|
||||
int rc;
|
||||
int charsRequired;
|
||||
char *ret;
|
||||
|
||||
parse_uri(s, &uri);
|
||||
|
||||
if ((rc = uriNormalizeSyntaxA(&uri)) != URI_SUCCESS)
|
||||
elog(ERROR, "uriNormalizeSyntaxA() failed: error code %d", rc);
|
||||
|
||||
if ((rc = uriToStringCharsRequiredA(&uri, &charsRequired)) != URI_SUCCESS)
|
||||
elog(ERROR, "uriToStringCharsRequiredA() failed: error code %d", rc);
|
||||
charsRequired++;
|
||||
|
||||
ret = palloc(charsRequired);
|
||||
if ((rc = uriToStringA(ret, &uri, charsRequired, NULL)) != URI_SUCCESS)
|
||||
elog(ERROR, "uriToStringA() failed: error code %d", rc);
|
||||
|
||||
uriFreeUriMembersA(&uri);
|
||||
|
||||
PG_RETURN_URI_P((uritype *) cstring_to_text(ret));
|
||||
}
|
||||
|
||||
static int
|
||||
cmp_text_range(UriTextRangeA a, UriTextRangeA b)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue