diff --git a/README.md b/README.md index 45f71d7..5ec7e07 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,19 @@ A number of functions are provided to extract parts of a URI: Extracts the port of a URI as an integer, for example `5432`. If no port is specified, the return value is null. +- `uri_path(uri) returns text` + + Extracts the path component of a URI. Logically, a URI always + contains a path. The return value can be an empty string but + never null. + +- `uri_path_array(uri) returns text[]` + + Returns the path component of a URI as an array, with the path + split at the slash characters. This is probably not as useful as + the `uri_path` function, but it is provided here because the + `uriparser` library exposes it. + - `uri_query(uri) returns text` Extracts the query part of a URI (roughly speaking, everything diff --git a/test/expected/test.out b/test/expected/test.out index d900472..26a7588 100644 --- a/test/expected/test.out +++ b/test/expected/test.out @@ -13,7 +13,9 @@ VALUES ('http://www.postgresql.org/'), ('http://[1080::8:800:200C:417A]/foo'), ('http://host:'), (''), - ('foobar'); + ('/'), + ('foobar'), + ('/foobar'); SELECT * FROM test; a | b ----+----------------------------------------------------------------------------------------- @@ -28,8 +30,10 @@ SELECT * FROM test; 9 | http://[1080::8:800:200C:417A]/foo 10 | http://host: 11 | - 12 | foobar -(12 rows) + 12 | / + 13 | foobar + 14 | /foobar +(14 rows) -- error cases SELECT uri 'http://host:port/'; @@ -44,135 +48,172 @@ SELECT b AS uri, uri_host_inet(b), uri_port(b), uri_path(b), + uri_path_array(b), uri_query(b), uri_fragment(b) FROM test; --[ RECORD 1 ]-+---------------------------------------------------------------------------------------- -uri | http://www.postgresql.org/ -uri_scheme | http -uri_userinfo | _null_ -uri_host | www.postgresql.org -uri_host_inet | _null_ -uri_port | _null_ -uri_path | {""} -uri_query | _null_ -uri_fragment | _null_ --[ RECORD 2 ]-+---------------------------------------------------------------------------------------- -uri | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS -uri_scheme | http -uri_userinfo | _null_ -uri_host | www.postgresql.org -uri_host_inet | _null_ -uri_port | _null_ -uri_path | {docs,devel,static,xfunc-sql.html} -uri_query | _null_ -uri_fragment | XFUNC-SQL-FUNCTION-ARGUMENTS --[ RECORD 3 ]-+---------------------------------------------------------------------------------------- -uri | https://duckduckgo.com/?q=postgresql&ia=about -uri_scheme | https -uri_userinfo | _null_ -uri_host | duckduckgo.com -uri_host_inet | _null_ -uri_port | _null_ -uri_path | {""} -uri_query | q=postgresql&ia=about -uri_fragment | _null_ --[ RECORD 4 ]-+---------------------------------------------------------------------------------------- -uri | ftp://ftp.gnu.org/gnu/bison -uri_scheme | ftp -uri_userinfo | _null_ -uri_host | ftp.gnu.org -uri_host_inet | _null_ -uri_port | _null_ -uri_path | {gnu,bison} -uri_query | _null_ -uri_fragment | _null_ --[ RECORD 5 ]-+---------------------------------------------------------------------------------------- -uri | mailto:foo@example.com -uri_scheme | mailto -uri_userinfo | _null_ -uri_host | _null_ -uri_host_inet | _null_ -uri_port | _null_ -uri_path | {foo@example.com} -uri_query | _null_ -uri_fragment | _null_ --[ RECORD 6 ]-+---------------------------------------------------------------------------------------- -uri | ssh://username@review.openstack.org:29418/openstack/nova.git -uri_scheme | ssh -uri_userinfo | username -uri_host | review.openstack.org -uri_host_inet | _null_ -uri_port | 29418 -uri_path | {openstack,nova.git} -uri_query | _null_ -uri_fragment | _null_ --[ RECORD 7 ]-+---------------------------------------------------------------------------------------- -uri | http://admin:password@192.168.0.1 -uri_scheme | http -uri_userinfo | admin:password -uri_host | 192.168.0.1 -uri_host_inet | 192.168.0.1 -uri_port | _null_ -uri_path | {} -uri_query | _null_ -uri_fragment | _null_ --[ RECORD 8 ]-+---------------------------------------------------------------------------------------- -uri | http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html -uri_scheme | http -uri_userinfo | _null_ -uri_host | FEDC:BA98:7654:3210:FEDC:BA98:7654:3210 -uri_host_inet | fedc:ba98:7654:3210:fedc:ba98:7654:3210 -uri_port | 80 -uri_path | {index.html} -uri_query | _null_ -uri_fragment | _null_ --[ RECORD 9 ]-+---------------------------------------------------------------------------------------- -uri | http://[1080::8:800:200C:417A]/foo -uri_scheme | http -uri_userinfo | _null_ -uri_host | 1080::8:800:200C:417A -uri_host_inet | 1080::8:800:200c:417a -uri_port | _null_ -uri_path | {foo} -uri_query | _null_ -uri_fragment | _null_ --[ RECORD 10 ]+---------------------------------------------------------------------------------------- -uri | http://host: -uri_scheme | http -uri_userinfo | _null_ -uri_host | host -uri_host_inet | _null_ -uri_port | _null_ -uri_path | {} -uri_query | _null_ -uri_fragment | _null_ --[ RECORD 11 ]+---------------------------------------------------------------------------------------- -uri | -uri_scheme | _null_ -uri_userinfo | _null_ -uri_host | _null_ -uri_host_inet | _null_ -uri_port | _null_ -uri_path | {} -uri_query | _null_ -uri_fragment | _null_ --[ RECORD 12 ]+---------------------------------------------------------------------------------------- -uri | foobar -uri_scheme | _null_ -uri_userinfo | _null_ -uri_host | _null_ -uri_host_inet | _null_ -uri_port | _null_ -uri_path | {foobar} -uri_query | _null_ -uri_fragment | _null_ +-[ RECORD 1 ]--+---------------------------------------------------------------------------------------- +uri | http://www.postgresql.org/ +uri_scheme | http +uri_userinfo | _null_ +uri_host | www.postgresql.org +uri_host_inet | _null_ +uri_port | _null_ +uri_path | / +uri_path_array | {""} +uri_query | _null_ +uri_fragment | _null_ +-[ RECORD 2 ]--+---------------------------------------------------------------------------------------- +uri | http://www.postgresql.org/docs/devel/static/xfunc-sql.html#XFUNC-SQL-FUNCTION-ARGUMENTS +uri_scheme | http +uri_userinfo | _null_ +uri_host | www.postgresql.org +uri_host_inet | _null_ +uri_port | _null_ +uri_path | /docs/devel/static/xfunc-sql.html +uri_path_array | {docs,devel,static,xfunc-sql.html} +uri_query | _null_ +uri_fragment | XFUNC-SQL-FUNCTION-ARGUMENTS +-[ RECORD 3 ]--+---------------------------------------------------------------------------------------- +uri | https://duckduckgo.com/?q=postgresql&ia=about +uri_scheme | https +uri_userinfo | _null_ +uri_host | duckduckgo.com +uri_host_inet | _null_ +uri_port | _null_ +uri_path | / +uri_path_array | {""} +uri_query | q=postgresql&ia=about +uri_fragment | _null_ +-[ RECORD 4 ]--+---------------------------------------------------------------------------------------- +uri | ftp://ftp.gnu.org/gnu/bison +uri_scheme | ftp +uri_userinfo | _null_ +uri_host | ftp.gnu.org +uri_host_inet | _null_ +uri_port | _null_ +uri_path | /gnu/bison +uri_path_array | {gnu,bison} +uri_query | _null_ +uri_fragment | _null_ +-[ RECORD 5 ]--+---------------------------------------------------------------------------------------- +uri | mailto:foo@example.com +uri_scheme | mailto +uri_userinfo | _null_ +uri_host | _null_ +uri_host_inet | _null_ +uri_port | _null_ +uri_path | foo@example.com +uri_path_array | {foo@example.com} +uri_query | _null_ +uri_fragment | _null_ +-[ RECORD 6 ]--+---------------------------------------------------------------------------------------- +uri | ssh://username@review.openstack.org:29418/openstack/nova.git +uri_scheme | ssh +uri_userinfo | username +uri_host | review.openstack.org +uri_host_inet | _null_ +uri_port | 29418 +uri_path | /openstack/nova.git +uri_path_array | {openstack,nova.git} +uri_query | _null_ +uri_fragment | _null_ +-[ RECORD 7 ]--+---------------------------------------------------------------------------------------- +uri | http://admin:password@192.168.0.1 +uri_scheme | http +uri_userinfo | admin:password +uri_host | 192.168.0.1 +uri_host_inet | 192.168.0.1 +uri_port | _null_ +uri_path | +uri_path_array | {} +uri_query | _null_ +uri_fragment | _null_ +-[ RECORD 8 ]--+---------------------------------------------------------------------------------------- +uri | http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html +uri_scheme | http +uri_userinfo | _null_ +uri_host | FEDC:BA98:7654:3210:FEDC:BA98:7654:3210 +uri_host_inet | fedc:ba98:7654:3210:fedc:ba98:7654:3210 +uri_port | 80 +uri_path | /index.html +uri_path_array | {index.html} +uri_query | _null_ +uri_fragment | _null_ +-[ RECORD 9 ]--+---------------------------------------------------------------------------------------- +uri | http://[1080::8:800:200C:417A]/foo +uri_scheme | http +uri_userinfo | _null_ +uri_host | 1080::8:800:200C:417A +uri_host_inet | 1080::8:800:200c:417a +uri_port | _null_ +uri_path | /foo +uri_path_array | {foo} +uri_query | _null_ +uri_fragment | _null_ +-[ RECORD 10 ]-+---------------------------------------------------------------------------------------- +uri | http://host: +uri_scheme | http +uri_userinfo | _null_ +uri_host | host +uri_host_inet | _null_ +uri_port | _null_ +uri_path | +uri_path_array | {} +uri_query | _null_ +uri_fragment | _null_ +-[ RECORD 11 ]-+---------------------------------------------------------------------------------------- +uri | +uri_scheme | _null_ +uri_userinfo | _null_ +uri_host | _null_ +uri_host_inet | _null_ +uri_port | _null_ +uri_path | +uri_path_array | {} +uri_query | _null_ +uri_fragment | _null_ +-[ RECORD 12 ]-+---------------------------------------------------------------------------------------- +uri | / +uri_scheme | _null_ +uri_userinfo | _null_ +uri_host | _null_ +uri_host_inet | _null_ +uri_port | _null_ +uri_path | / +uri_path_array | {} +uri_query | _null_ +uri_fragment | _null_ +-[ RECORD 13 ]-+---------------------------------------------------------------------------------------- +uri | foobar +uri_scheme | _null_ +uri_userinfo | _null_ +uri_host | _null_ +uri_host_inet | _null_ +uri_port | _null_ +uri_path | foobar +uri_path_array | {foobar} +uri_query | _null_ +uri_fragment | _null_ +-[ RECORD 14 ]-+---------------------------------------------------------------------------------------- +uri | /foobar +uri_scheme | _null_ +uri_userinfo | _null_ +uri_host | _null_ +uri_host_inet | _null_ +uri_port | _null_ +uri_path | /foobar +uri_path_array | {foobar} +uri_query | _null_ +uri_fragment | _null_ \x off SELECT DISTINCT b FROM test ORDER BY b; b ----------------------------------------------------------------------------------------- + / + /foobar foobar ftp://ftp.gnu.org/gnu/bison http://[1080::8:800:200C:417A]/foo @@ -184,5 +225,5 @@ SELECT DISTINCT b FROM test ORDER BY b; https://duckduckgo.com/?q=postgresql&ia=about mailto:foo@example.com ssh://username@review.openstack.org:29418/openstack/nova.git -(12 rows) +(14 rows) diff --git a/test/sql/test.sql b/test/sql/test.sql index 09f2495..67cf1f4 100644 --- a/test/sql/test.sql +++ b/test/sql/test.sql @@ -16,7 +16,9 @@ VALUES ('http://www.postgresql.org/'), ('http://[1080::8:800:200C:417A]/foo'), ('http://host:'), (''), - ('foobar'); + ('/'), + ('foobar'), + ('/foobar'); SELECT * FROM test; @@ -32,6 +34,7 @@ SELECT b AS uri, uri_host_inet(b), uri_port(b), uri_path(b), + uri_path_array(b), uri_query(b), uri_fragment(b) FROM test; diff --git a/uri.c b/uri.c index 2e923ba..5c7901c 100644 --- a/uri.c +++ b/uri.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -223,6 +224,34 @@ uri_fragment(PG_FUNCTION_ARGS) PG_FUNCTION_INFO_V1(uri_path); Datum uri_path(PG_FUNCTION_ARGS) +{ + Datum arg = PG_GETARG_DATUM(0); + char *s = TextDatumGetCString(arg); + UriUriA uri; + StringInfoData buf; + UriPathSegmentA *p; + + initStringInfo(&buf); + + parse_uri(s, &uri); + + if (uri.absolutePath || (uriIsHostSetA(&uri) && uri.pathHead)) + appendStringInfoChar(&buf, '/'); + + for (p = uri.pathHead; p; p = p->next) + { + appendBinaryStringInfo(&buf, p->text.first, p->text.afterLast - p->text.first); + if (p->next) + appendStringInfoChar(&buf, '/'); + } + + uriFreeUriMembersA(&uri); + PG_RETURN_TEXT_P(cstring_to_text(buf.data)); +} + +PG_FUNCTION_INFO_V1(uri_path_array); +Datum +uri_path_array(PG_FUNCTION_ARGS) { Datum arg = PG_GETARG_DATUM(0); char *s = TextDatumGetCString(arg); diff --git a/uri.sql b/uri.sql index 75ac607..94be6c6 100644 --- a/uri.sql +++ b/uri.sql @@ -68,7 +68,13 @@ CREATE FUNCTION uri_fragment(uri) RETURNS text LANGUAGE C AS '$libdir/uri'; -CREATE FUNCTION uri_path(uri) RETURNS text[] +CREATE FUNCTION uri_path(uri) RETURNS text + IMMUTABLE + STRICT + LANGUAGE C + AS '$libdir/uri'; + +CREATE FUNCTION uri_path_array(uri) RETURNS text[] IMMUTABLE STRICT LANGUAGE C