Add uri_normalize function
This commit is contained in:
parent
527cc23bc4
commit
8a02974787
16
README.md
16
README.md
|
@ -110,3 +110,19 @@ A number of functions are provided to extract parts of a URI:
|
||||||
|
|
||||||
Extracts the fragment part of a URI (roughly speaking, everything
|
Extracts the fragment part of a URI (roughly speaking, everything
|
||||||
after the `#`). If there is no fragment part, returns null.
|
after the `#`). If there is no fragment part, returns null.
|
||||||
|
|
||||||
|
Other functions:
|
||||||
|
|
||||||
|
- `uri_normalize(uri) returns uri`
|
||||||
|
|
||||||
|
Performs syntax-based normalization of the URI. This includes
|
||||||
|
case normalization, percent-encoding normalization, and removing
|
||||||
|
redundant `.` and `..` path segments. See
|
||||||
|
[RFC 3986 section 6.2.2](http://tools.ietf.org/html/rfc3986#section-6.2.2)
|
||||||
|
for the full details.
|
||||||
|
|
||||||
|
Note that this module (and similar modules in other programming
|
||||||
|
languages) compares URIs for equality in their original form,
|
||||||
|
without normalization. If you want to consider distinct URIs
|
||||||
|
without regard for mostly irrelevant syntax differences, pass them
|
||||||
|
through this function.
|
||||||
|
|
|
@ -16,6 +16,11 @@ VALUES ('http://www.postgresql.org/'),
|
||||||
('/'),
|
('/'),
|
||||||
('foobar'),
|
('foobar'),
|
||||||
('/foobar');
|
('/foobar');
|
||||||
|
-- normalization test values from <https://tools.ietf.org/html/rfc3986#section-6.2.2>
|
||||||
|
INSERT INTO test (b)
|
||||||
|
VALUES ('HTTP://www.EXAMPLE.com/'),
|
||||||
|
('http://www.ex%41mple.com/'),
|
||||||
|
('eXAMPLE://a/./b/../b/%63/%7bfoo%7d');
|
||||||
SELECT * FROM test;
|
SELECT * FROM test;
|
||||||
a | b
|
a | b
|
||||||
----+-----------------------------------------------------------------------------------------
|
----+-----------------------------------------------------------------------------------------
|
||||||
|
@ -33,7 +38,10 @@ SELECT * FROM test;
|
||||||
12 | /
|
12 | /
|
||||||
13 | foobar
|
13 | foobar
|
||||||
14 | /foobar
|
14 | /foobar
|
||||||
(14 rows)
|
15 | HTTP://www.EXAMPLE.com/
|
||||||
|
16 | http://www.ex%41mple.com/
|
||||||
|
17 | eXAMPLE://a/./b/../b/%63/%7bfoo%7d
|
||||||
|
(17 rows)
|
||||||
|
|
||||||
-- error cases
|
-- error cases
|
||||||
SELECT uri 'http://host:port/';
|
SELECT uri 'http://host:port/';
|
||||||
|
@ -42,6 +50,7 @@ LINE 1: SELECT uri 'http://host:port/';
|
||||||
^
|
^
|
||||||
\x on
|
\x on
|
||||||
SELECT b AS uri,
|
SELECT b AS uri,
|
||||||
|
uri_normalize(b),
|
||||||
uri_scheme(b),
|
uri_scheme(b),
|
||||||
uri_userinfo(b),
|
uri_userinfo(b),
|
||||||
uri_host(b),
|
uri_host(b),
|
||||||
|
@ -54,6 +63,7 @@ SELECT b AS uri,
|
||||||
FROM test;
|
FROM test;
|
||||||
-[ RECORD 1 ]--+----------------------------------------------------------------------------------------
|
-[ RECORD 1 ]--+----------------------------------------------------------------------------------------
|
||||||
uri | http://www.postgresql.org/
|
uri | http://www.postgresql.org/
|
||||||
|
uri_normalize | http://www.postgresql.org/
|
||||||
uri_scheme | http
|
uri_scheme | http
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | www.postgresql.org
|
uri_host | www.postgresql.org
|
||||||
|
@ -65,6 +75,7 @@ uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 2 ]--+----------------------------------------------------------------------------------------
|
-[ RECORD 2 ]--+----------------------------------------------------------------------------------------
|
||||||
uri | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
|
uri | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
|
||||||
|
uri_normalize | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
|
||||||
uri_scheme | http
|
uri_scheme | http
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | www.postgresql.org
|
uri_host | www.postgresql.org
|
||||||
|
@ -76,6 +87,7 @@ uri_query | _null_
|
||||||
uri_fragment | XFUNC-SQL-FUNCTION-ARGUMENTS
|
uri_fragment | XFUNC-SQL-FUNCTION-ARGUMENTS
|
||||||
-[ RECORD 3 ]--+----------------------------------------------------------------------------------------
|
-[ RECORD 3 ]--+----------------------------------------------------------------------------------------
|
||||||
uri | https://duckduckgo.com/?q=postgresql&ia=about
|
uri | https://duckduckgo.com/?q=postgresql&ia=about
|
||||||
|
uri_normalize | https://duckduckgo.com/?q=postgresql&ia=about
|
||||||
uri_scheme | https
|
uri_scheme | https
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | duckduckgo.com
|
uri_host | duckduckgo.com
|
||||||
|
@ -87,6 +99,7 @@ uri_query | q=postgresql&ia=about
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 4 ]--+----------------------------------------------------------------------------------------
|
-[ RECORD 4 ]--+----------------------------------------------------------------------------------------
|
||||||
uri | ftp://ftp.gnu.org/gnu/bison
|
uri | ftp://ftp.gnu.org/gnu/bison
|
||||||
|
uri_normalize | ftp://ftp.gnu.org/gnu/bison
|
||||||
uri_scheme | ftp
|
uri_scheme | ftp
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | ftp.gnu.org
|
uri_host | ftp.gnu.org
|
||||||
|
@ -98,6 +111,7 @@ uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 5 ]--+----------------------------------------------------------------------------------------
|
-[ RECORD 5 ]--+----------------------------------------------------------------------------------------
|
||||||
uri | mailto:foo@example.com
|
uri | mailto:foo@example.com
|
||||||
|
uri_normalize | mailto:foo@example.com
|
||||||
uri_scheme | mailto
|
uri_scheme | mailto
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | _null_
|
uri_host | _null_
|
||||||
|
@ -109,6 +123,7 @@ uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 6 ]--+----------------------------------------------------------------------------------------
|
-[ RECORD 6 ]--+----------------------------------------------------------------------------------------
|
||||||
uri | ssh://username@review.openstack.org:29418/openstack/nova.git
|
uri | ssh://username@review.openstack.org:29418/openstack/nova.git
|
||||||
|
uri_normalize | ssh://username@review.openstack.org:29418/openstack/nova.git
|
||||||
uri_scheme | ssh
|
uri_scheme | ssh
|
||||||
uri_userinfo | username
|
uri_userinfo | username
|
||||||
uri_host | review.openstack.org
|
uri_host | review.openstack.org
|
||||||
|
@ -120,6 +135,7 @@ uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 7 ]--+----------------------------------------------------------------------------------------
|
-[ RECORD 7 ]--+----------------------------------------------------------------------------------------
|
||||||
uri | http://admin:password@192.168.0.1
|
uri | http://admin:password@192.168.0.1
|
||||||
|
uri_normalize | http://admin:password@192.168.0.1
|
||||||
uri_scheme | http
|
uri_scheme | http
|
||||||
uri_userinfo | admin:password
|
uri_userinfo | admin:password
|
||||||
uri_host | 192.168.0.1
|
uri_host | 192.168.0.1
|
||||||
|
@ -131,6 +147,7 @@ uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 8 ]--+----------------------------------------------------------------------------------------
|
-[ RECORD 8 ]--+----------------------------------------------------------------------------------------
|
||||||
uri | http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
|
uri | http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
|
||||||
|
uri_normalize | http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:80/index.html
|
||||||
uri_scheme | http
|
uri_scheme | http
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | FEDC:BA98:7654:3210:FEDC:BA98:7654:3210
|
uri_host | FEDC:BA98:7654:3210:FEDC:BA98:7654:3210
|
||||||
|
@ -142,6 +159,7 @@ uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 9 ]--+----------------------------------------------------------------------------------------
|
-[ RECORD 9 ]--+----------------------------------------------------------------------------------------
|
||||||
uri | http://[1080::8:800:200C:417A]/foo
|
uri | http://[1080::8:800:200C:417A]/foo
|
||||||
|
uri_normalize | http://[1080:0000:0000:0000:0008:0800:200c:417a]/foo
|
||||||
uri_scheme | http
|
uri_scheme | http
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | 1080::8:800:200C:417A
|
uri_host | 1080::8:800:200C:417A
|
||||||
|
@ -153,6 +171,7 @@ uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 10 ]-+----------------------------------------------------------------------------------------
|
-[ RECORD 10 ]-+----------------------------------------------------------------------------------------
|
||||||
uri | http://host:
|
uri | http://host:
|
||||||
|
uri_normalize | http://host:
|
||||||
uri_scheme | http
|
uri_scheme | http
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | host
|
uri_host | host
|
||||||
|
@ -164,6 +183,7 @@ uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 11 ]-+----------------------------------------------------------------------------------------
|
-[ RECORD 11 ]-+----------------------------------------------------------------------------------------
|
||||||
uri |
|
uri |
|
||||||
|
uri_normalize |
|
||||||
uri_scheme | _null_
|
uri_scheme | _null_
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | _null_
|
uri_host | _null_
|
||||||
|
@ -175,6 +195,7 @@ uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 12 ]-+----------------------------------------------------------------------------------------
|
-[ RECORD 12 ]-+----------------------------------------------------------------------------------------
|
||||||
uri | /
|
uri | /
|
||||||
|
uri_normalize | /
|
||||||
uri_scheme | _null_
|
uri_scheme | _null_
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | _null_
|
uri_host | _null_
|
||||||
|
@ -186,6 +207,7 @@ uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 13 ]-+----------------------------------------------------------------------------------------
|
-[ RECORD 13 ]-+----------------------------------------------------------------------------------------
|
||||||
uri | foobar
|
uri | foobar
|
||||||
|
uri_normalize | foobar
|
||||||
uri_scheme | _null_
|
uri_scheme | _null_
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | _null_
|
uri_host | _null_
|
||||||
|
@ -197,6 +219,7 @@ uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
-[ RECORD 14 ]-+----------------------------------------------------------------------------------------
|
-[ RECORD 14 ]-+----------------------------------------------------------------------------------------
|
||||||
uri | /foobar
|
uri | /foobar
|
||||||
|
uri_normalize | /foobar
|
||||||
uri_scheme | _null_
|
uri_scheme | _null_
|
||||||
uri_userinfo | _null_
|
uri_userinfo | _null_
|
||||||
uri_host | _null_
|
uri_host | _null_
|
||||||
|
@ -206,6 +229,42 @@ uri_path | /foobar
|
||||||
uri_path_array | {foobar}
|
uri_path_array | {foobar}
|
||||||
uri_query | _null_
|
uri_query | _null_
|
||||||
uri_fragment | _null_
|
uri_fragment | _null_
|
||||||
|
-[ RECORD 15 ]-+----------------------------------------------------------------------------------------
|
||||||
|
uri | HTTP://www.EXAMPLE.com/
|
||||||
|
uri_normalize | http://www.example.com/
|
||||||
|
uri_scheme | HTTP
|
||||||
|
uri_userinfo | _null_
|
||||||
|
uri_host | www.EXAMPLE.com
|
||||||
|
uri_host_inet | _null_
|
||||||
|
uri_port | _null_
|
||||||
|
uri_path | /
|
||||||
|
uri_path_array | {""}
|
||||||
|
uri_query | _null_
|
||||||
|
uri_fragment | _null_
|
||||||
|
-[ RECORD 16 ]-+----------------------------------------------------------------------------------------
|
||||||
|
uri | http://www.ex%41mple.com/
|
||||||
|
uri_normalize | http://www.example.com/
|
||||||
|
uri_scheme | http
|
||||||
|
uri_userinfo | _null_
|
||||||
|
uri_host | www.ex%41mple.com
|
||||||
|
uri_host_inet | _null_
|
||||||
|
uri_port | _null_
|
||||||
|
uri_path | /
|
||||||
|
uri_path_array | {""}
|
||||||
|
uri_query | _null_
|
||||||
|
uri_fragment | _null_
|
||||||
|
-[ RECORD 17 ]-+----------------------------------------------------------------------------------------
|
||||||
|
uri | eXAMPLE://a/./b/../b/%63/%7bfoo%7d
|
||||||
|
uri_normalize | example://a/b/c/%7Bfoo%7D
|
||||||
|
uri_scheme | eXAMPLE
|
||||||
|
uri_userinfo | _null_
|
||||||
|
uri_host | a
|
||||||
|
uri_host_inet | _null_
|
||||||
|
uri_port | _null_
|
||||||
|
uri_path | /./b/../b/%63/%7bfoo%7d
|
||||||
|
uri_path_array | {.,b,..,b,%63,%7bfoo%7d}
|
||||||
|
uri_query | _null_
|
||||||
|
uri_fragment | _null_
|
||||||
|
|
||||||
\x off
|
\x off
|
||||||
SELECT DISTINCT b FROM test ORDER BY b;
|
SELECT DISTINCT b FROM test ORDER BY b;
|
||||||
|
@ -214,16 +273,19 @@ SELECT DISTINCT b FROM test ORDER BY b;
|
||||||
|
|
||||||
/
|
/
|
||||||
/foobar
|
/foobar
|
||||||
|
HTTP://www.EXAMPLE.com/
|
||||||
|
eXAMPLE://a/./b/../b/%63/%7bfoo%7d
|
||||||
foobar
|
foobar
|
||||||
ftp://ftp.gnu.org/gnu/bison
|
ftp://ftp.gnu.org/gnu/bison
|
||||||
http://[1080::8:800:200C:417A]/foo
|
http://[1080::8:800:200C:417A]/foo
|
||||||
http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
|
http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html
|
||||||
http://admin:password@192.168.0.1
|
http://admin:password@192.168.0.1
|
||||||
http://host:
|
http://host:
|
||||||
|
http://www.ex%41mple.com/
|
||||||
http://www.postgresql.org/
|
http://www.postgresql.org/
|
||||||
http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
|
http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS
|
||||||
https://duckduckgo.com/?q=postgresql&ia=about
|
https://duckduckgo.com/?q=postgresql&ia=about
|
||||||
mailto:foo@example.com
|
mailto:foo@example.com
|
||||||
ssh://username@review.openstack.org:29418/openstack/nova.git
|
ssh://username@review.openstack.org:29418/openstack/nova.git
|
||||||
(14 rows)
|
(17 rows)
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,12 @@ VALUES ('http://www.postgresql.org/'),
|
||||||
('foobar'),
|
('foobar'),
|
||||||
('/foobar');
|
('/foobar');
|
||||||
|
|
||||||
|
-- normalization test values from <https://tools.ietf.org/html/rfc3986#section-6.2.2>
|
||||||
|
INSERT INTO test (b)
|
||||||
|
VALUES ('HTTP://www.EXAMPLE.com/'),
|
||||||
|
('http://www.ex%41mple.com/'),
|
||||||
|
('eXAMPLE://a/./b/../b/%63/%7bfoo%7d');
|
||||||
|
|
||||||
SELECT * FROM test;
|
SELECT * FROM test;
|
||||||
|
|
||||||
-- error cases
|
-- error cases
|
||||||
|
@ -28,6 +34,7 @@ SELECT uri 'http://host:port/';
|
||||||
|
|
||||||
\x on
|
\x on
|
||||||
SELECT b AS uri,
|
SELECT b AS uri,
|
||||||
|
uri_normalize(b),
|
||||||
uri_scheme(b),
|
uri_scheme(b),
|
||||||
uri_userinfo(b),
|
uri_userinfo(b),
|
||||||
uri_host(b),
|
uri_host(b),
|
||||||
|
|
29
uri.c
29
uri.c
|
@ -277,6 +277,35 @@ uri_path_array(PG_FUNCTION_ARGS)
|
||||||
PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
|
PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(uri_normalize);
|
||||||
|
Datum
|
||||||
|
uri_normalize(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
Datum arg = PG_GETARG_DATUM(0);
|
||||||
|
char *s = TextDatumGetCString(arg);
|
||||||
|
UriUriA uri;
|
||||||
|
int rc;
|
||||||
|
int charsRequired;
|
||||||
|
char *ret;
|
||||||
|
|
||||||
|
parse_uri(s, &uri);
|
||||||
|
|
||||||
|
if ((rc = uriNormalizeSyntaxA(&uri)) != URI_SUCCESS)
|
||||||
|
elog(ERROR, "uriNormalizeSyntaxA() failed: error code %d", rc);
|
||||||
|
|
||||||
|
if ((rc = uriToStringCharsRequiredA(&uri, &charsRequired)) != URI_SUCCESS)
|
||||||
|
elog(ERROR, "uriToStringCharsRequiredA() failed: error code %d", rc);
|
||||||
|
charsRequired++;
|
||||||
|
|
||||||
|
ret = palloc(charsRequired);
|
||||||
|
if ((rc = uriToStringA(ret, &uri, charsRequired, NULL)) != URI_SUCCESS)
|
||||||
|
elog(ERROR, "uriToStringA() failed: error code %d", rc);
|
||||||
|
|
||||||
|
uriFreeUriMembersA(&uri);
|
||||||
|
|
||||||
|
PG_RETURN_URI_P((uritype *) cstring_to_text(ret));
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
cmp_text_range(UriTextRangeA a, UriTextRangeA b)
|
cmp_text_range(UriTextRangeA a, UriTextRangeA b)
|
||||||
{
|
{
|
||||||
|
|
7
uri.sql
7
uri.sql
|
@ -81,6 +81,13 @@ CREATE FUNCTION uri_path_array(uri) RETURNS text[]
|
||||||
AS '$libdir/uri';
|
AS '$libdir/uri';
|
||||||
|
|
||||||
|
|
||||||
|
CREATE FUNCTION uri_normalize(uri) RETURNS uri
|
||||||
|
IMMUTABLE
|
||||||
|
STRICT
|
||||||
|
LANGUAGE C
|
||||||
|
AS '$libdir/uri';
|
||||||
|
|
||||||
|
|
||||||
CREATE FUNCTION uri_lt(uri, uri) RETURNS boolean
|
CREATE FUNCTION uri_lt(uri, uri) RETURNS boolean
|
||||||
IMMUTABLE
|
IMMUTABLE
|
||||||
STRICT
|
STRICT
|
||||||
|
|
Loading…
Reference in New Issue