Help language development. Donate to The Perl Foundation

Cro::Core cpan:JNTHN last updated on 2021-11-12

t/uri.t
use Cro::Iri;
use Cro::Uri;
use Test;

sub parses($desc, $uri, *@checks, :$relative, :$ref) {
    my $method = $relative ?? 'parse-relative' !!
                 $ref      ?? 'parse-ref' !!
                              'parse';
    # IRI is a superset of URI, so has to parse everything URI can
    for Cro::Uri, Cro::Iri -> $parser {
        with try $parser."$method"($uri) -> $parsed {
            pass $desc ~ " for $parser.^name()";
            for @checks.kv -> $i, $check {
                ok $check($parsed), "Check { $i + 1 }";
            }
        }
        else {
            diag "$parser.^name() parsing failed: $!";
            flunk $desc;
            skip 'Failed to parse', @checks.elems;
        }
    }
}

sub refuses($desc, $uri) {
    with try Cro::Uri.parse($uri) {
        diag "Incorrectly parsed $uri";
        flunk $desc;
    }
    elsif $! ~~ X::Cro::Uri::ParseError {
        pass $desc;
    }
    else {
        diag "Wrong exception type ($!.^name())";
        flunk $desc;
    }
}

my $long-name = 'abc://username:[email protected]:123/path/data?key=value&key2=value2#fragid1';
is Cro::Uri.parse($long-name).Str, $long-name, '.Str method returns the original string';

parses 'Simple URN',
    'urn:example:animal:ferret:nose',
    *.scheme eq 'urn',
    !*.authority.defined,
    *.path eq 'example:animal:ferret:nose',
    !*.query.defined,
    !*.fragment.defined;

parses 'URN with empty path',
    'wat:',
    *.scheme eq 'wat',
    !*.authority.defined,
    *.path eq '',
    !*.query.defined,
    !*.fragment.defined;

parses 'A URI with all component parts',
    'foo://example.com:8042/over/there?name=ferret#nose',
    *.scheme eq 'foo',
    *.authority eq 'example.com:8042',
    *.path eq '/over/there',
    *.query eq 'name=ferret',
    *.fragment eq 'nose';

parses 'A URI without a query or fragment',
    'foo://example.com:8042/over/there',
    *.scheme eq 'foo',
    *.authority eq 'example.com:8042',
    *.path eq '/over/there',
    !*.query.defined,
    !*.fragment.defined;

parses 'A URI with a query but no fragment',
    'foo://example.com:8042/over/there?name=ferret',
    *.scheme eq 'foo',
    *.authority eq 'example.com:8042',
    *.path eq '/over/there',
    *.query eq 'name=ferret',
    !*.fragment.defined;

parses 'A URI with a fragment but no query',
    'foo://example.com:8042/over/there#nose',
    *.scheme eq 'foo',
    *.authority eq 'example.com:8042',
    *.path eq '/over/there',
    !*.query.defined,
    *.fragment eq 'nose';

parses 'A URI with an empty path',
    'foo://example.com:8042',
    *.scheme eq 'foo',
    *.authority eq 'example.com:8042',
    *.path eq '',
    !*.query.defined,
    !*.fragment.defined;

parses 'A URI with a path of /',
    'foo://example.com:8042/',
    *.scheme eq 'foo',
    *.authority eq 'example.com:8042',
    *.path eq '/',
    !*.query.defined,
    !*.fragment.defined;

parses 'A URI with an empty path and a query',
    'foo://example.com:8042?name=ferret',
    *.scheme eq 'foo',
    *.authority eq 'example.com:8042',
    *.path eq '',
    *.query eq 'name=ferret',
    !*.fragment.defined;

parses 'A URI with an empty path and a fragment',
    'foo://example.com:8042#nose',
    *.scheme eq 'foo',
    *.authority eq 'example.com:8042',
    *.path eq '',
    !*.query.defined,
    *.fragment eq 'nose';

parses 'Empty query and fragment are defined and empty string',
    'foo://example.com:8042?#',
    *.scheme eq 'foo',
    *.authority eq 'example.com:8042',
    *.path eq '',
    *.query.defined,
    *.query eq '',
    *.fragment.defined,
    *.fragment eq '';

refuses 'Something without a : after a scheme', 'foo';

refuses 'Scheme starting with a digit', '1ab:example';
refuses 'Scheme starting with a +', '+ab:example';
refuses 'Scheme starting with a -', '-ab:example';
refuses 'Scheme starting with a .', '.ab:example';
refuses 'Scheme starting with a !', '!ab:example';
refuses 'Scheme starting with a ~', '~ab:example';
refuses 'Scheme starting with a /', '/ab:example';
refuses 'Scheme starting with a :', ':ab:example';

for <a a1 redis+tls some-protocol some.protocol LOUD-CAT x123+45.6-7z> -> $s {
    parses "URN with scheme $s",
        $s ~ ':example:animal:ferret:nose',
        *.scheme eq $s,
        !*.authority.defined,
        *.path eq 'example:animal:ferret:nose',
        !*.query.defined,
        !*.fragment.defined;
}

parses 'Authority parsed into host and port',
    'foo://example.com:8080/',
    *.scheme eq 'foo',
    *.authority eq 'example.com:8080',
    *.host eq 'example.com',
    *.host-class == Cro::ResourceIdentifier::Host::RegName,
    *.port == 8080,
    !*.userinfo.defined;

for <127.0.0.1 8.8.8.8 65.2.137.99 255.255.255.255 0.0.0.0> -> $ipv4 {
    parses "IPv4 host $ipv4",
        'foo://' ~ $ipv4 ~ ':8080/',
        *.scheme eq 'foo',
        *.authority eq "{$ipv4}:8080",
        *.host eq $ipv4,
        *.host-class == Cro::ResourceIdentifier::Host::IPv4,
        *.port == 8080,
        !*.userinfo.defined;
}

for <312.27.1.2 1.2.3.256 af.de.bc.11> -> $not-ipv4 {
    parses "Not-an-IPv4-address $not-ipv4 as a reg-name",
        'foo://' ~ $not-ipv4 ~ ':8080/',
        *.scheme eq 'foo',
        *.authority eq "{$not-ipv4}:8080",
        *.host eq $not-ipv4,
        *.host-class == Cro::ResourceIdentifier::Host::RegName,
        *.port == 8080,
        !*.userinfo.defined;
}

for <1080:0:0:0:8:800:200C:417A FF01:0:0:0:0:0:0:101 0:0:0:0:0:0:0:1
     0:0:0:0:0:0:0:0 1080::8:800:200C:417A FF01::101 ::1 ::
     0:0:0:0:0:0:13.1.68.3 0:0:0:0:0:FFFF:129.144.52.38
     ::FFFF:129.144.52.38> -> $ipv6 {
    parses "IPv6 host $ipv6",
        'foo://[' ~ $ipv6 ~ ']:8080/',
        *.scheme eq 'foo',
        *.authority eq "[{$ipv6}]:8080",
        *.host eq $ipv6,
        *.host-class == Cro::ResourceIdentifier::Host::IPv6,
        *.port == 8080,
        !*.userinfo.defined;
}

for <::OMG 1080::800:200C::417A 0-1 ::FFFF:257.144.52.38> -> $not-ipv6 {
    refuses "Bad IPv6 address $not-ipv6", 'foo://[' ~ $not-ipv6 ~ ']:8080/';
}

for <v8.OMG v7.$!&'()*:;+,= vA2B.X_X-X.~Y vfe.xxx> -> $ipvfuture {
    parses "IPvFuture host $ipvfuture",
        'foo://[' ~ $ipvfuture ~ ']:8080/',
        *.scheme eq 'foo',
        *.authority eq "[{$ipvfuture}]:8080",
        *.host eq $ipvfuture,
        *.host-class == Cro::ResourceIdentifier::Host::IPvFuture,
        *.port == 8080,
        !*.userinfo.defined;
}

for <vX.123 [email protected] v10.x/y/z v.abc> -> $not-ipvfuture {
    refuses "Bad IPvFuture address $not-ipvfuture", 'foo://[' ~ $not-ipvfuture ~ ']:8080/';
}

parses 'Empty host name is allowed in generic URIs',
    'foo://:8080/',
    *.scheme eq 'foo',
    *.authority eq ':8080',
    *.host eq '',
    *.host-class == Cro::ResourceIdentifier::Host::RegName,
    *.port == 8080,
    !*.userinfo.defined;

parses 'Hostname can have unreserved and subdelims',
    Q{foo://B-._a1!$&'()*+,;=~:8080/},
    *.scheme eq 'foo',
    *.authority eq Q{B-._a1!$&'()*+,;=~:8080},
    *.host eq Q{B-._a1!$&'()*+,;=~},
    *.host-class == Cro::ResourceIdentifier::Host::RegName,
    *.port == 8080,
    !*.userinfo.defined;

for <a:b y[ z] %% %zy> -> $bad {
    refuses "Bad reg-name with $bad in it", "foo://a{$bad}c:5000/";
}

parses 'Percent-encoded things in reg-name',
    'foo://%C3%80b.%E3%82%A2%E3%82%A2.com:8080/',
    *.scheme eq 'foo',
    *.authority eq '%C3%80b.%E3%82%A2%E3%82%A2.com:8080',
    *.host eq "\c[LATIN CAPITAL LETTER A WITH GRAVE]b.\c[KATAKANA LETTER A]\c[KATAKANA LETTER A].com",
    *.host-class == Cro::ResourceIdentifier::Host::RegName,
    *.port == 8080,
    !*.userinfo.defined,
    *.path eq '/';

parses 'When no port, port is not defined',
    'foo://example.com/',
    *.scheme eq 'foo',
    *.authority eq 'example.com',
    *.host eq 'example.com',
    *.host-class == Cro::ResourceIdentifier::Host::RegName,
    !*.port.defined,
    !*.userinfo.defined;

parses 'When empty port, port is not defined',
    'foo://example.com:/',
    *.scheme eq 'foo',
    *.authority eq 'example.com:',
    *.host eq 'example.com',
    *.host-class == Cro::ResourceIdentifier::Host::RegName,
    !*.port.defined,
    !*.userinfo.defined;

parses 'Can parse userinfo on a reg-name',
    'ssh://[email protected]',
    *.scheme eq 'ssh',
    *.authority eq '[email protected]',
    *.host eq 'some.secret.host',
    *.host-class == Cro::ResourceIdentifier::Host::RegName,
    !*.port.defined,
    *.userinfo eq 'jnthn',
    *.user eq 'jnthn',
    !*.password.defined;

parses 'Can parse userinfo on an IP address',
    'ssh://[email protected]',
    *.scheme eq 'ssh',
    *.authority eq '[email protected]',
    *.host eq '112.34.56.78',
    *.host-class == Cro::ResourceIdentifier::Host::IPv4,
    !*.port.defined,
    *.userinfo eq 'root',
    *.user eq 'root',
    !*.password.defined;

parses 'We split on the (deprecated, but in the RFC nonetheless, user:pass form)',
    'foo://bob:[email protected]',
    *.scheme eq 'foo',
    *.authority eq 'bob:[email protected]',
    *.host eq 'fbi.gov',
    *.host-class == Cro::ResourceIdentifier::Host::RegName,
    !*.port.defined,
    *.userinfo eq 'bob:s3cr3t',
    *.user eq 'bob',
    *.password eq 's3cr3t';

parses 'We can have unreserved and subdelims in userinfo',
    Q{foo://B-._a1!$&':()*+,;[email protected]:8080/},
    *.scheme eq 'foo',
    *.authority eq Q{B-._a1!$&':()*+,;[email protected]:8080},
    *.host eq 'omg.url',
    *.host-class == Cro::ResourceIdentifier::Host::RegName,
    *.port == 8080,
    *.userinfo eq Q{B-._a1!$&':()*+,;=~},
    *.user eq Q{B-._a1!$&'},
    *.password eq Q{()*+,;=~};

for <y[ z] %% %zy> -> $bad {
    refuses "Bad userinfo with $bad in it", "foo://a{$bad}[email protected]:5000/";
}

parses 'Can decode %-encoded things in userinfo',
    'foo://%C3%80b:%E3%82%A2%E3%82%[email protected]/lol',
    *.scheme eq 'foo',
    *.authority eq '%C3%80b:%E3%82%A2%E3%82%[email protected]',
    *.host eq 'unicode.org',
    *.host-class == Cro::ResourceIdentifier::Host::RegName,
    !*.port.defined,
    *.userinfo eq '%C3%80b:%E3%82%A2%E3%82%A2',
    *.user eq "\c[LATIN CAPITAL LETTER A WITH GRAVE]b",
    *.password eq "\c[KATAKANA LETTER A]\c[KATAKANA LETTER A]";

parses 'Path broken up into segments',
    'foo://example.com/abc/d-e/fg',
    *.scheme eq 'foo',
    *.authority eq 'example.com',
    *.path eq '/abc/d-e/fg',
    *.path-segments.elems == 3,
    *.path-segments[0] eq 'abc',
    *.path-segments[1] eq 'd-e',
    *.path-segments[2] eq 'fg';

parses 'Simple percent escapes in path',
    'foo://example.com/a%20b/%2F%2Fc',
    *.scheme eq 'foo',
    *.authority eq 'example.com',
    *.path eq '/a%20b/%2F%2Fc',
    *.path-segments.elems == 2,
    *.path-segments[0] eq 'a b',
    *.path-segments[1] eq '//c';

parses 'UTF-8 escapes in path',
    'foo://example.com/%C3%80b/%E3%82%A2%E3%82%A2',
    *.scheme eq 'foo',
    *.authority eq 'example.com',
    *.path eq '/%C3%80b/%E3%82%A2%E3%82%A2',
    *.path-segments.elems == 2,
    *.path-segments[0] eq "\c[LATIN CAPITAL LETTER A WITH GRAVE]b",
    *.path-segments[1] eq "\c[KATAKANA LETTER A]\c[KATAKANA LETTER A]";

for qw[- . _ ~ : @ ! $ & ' ( ) * + , ; =] -> $ok {
    parses $ok ~ ' in path', 'foo://localhost/bar/a' ~ $ok ~ '/yes',
        *.scheme eq 'foo',
        *.authority eq 'localhost',
        *.path eq "/bar/a{$ok}/yes",
        !*.query.defined,
        !*.fragment.defined;
}

for qw[- . _ ~ : @ ! $ & ' ( ) * + , ; = / ?] -> $ok {
    parses $ok ~ ' in query', 'foo://localhost/bar?oh' ~ $ok ~ 'yes',
        *.scheme eq 'foo',
        *.authority eq 'localhost',
        *.path eq '/bar',
        *.query eq "oh{$ok}yes",
        !*.fragment.defined;
    parses $ok ~ ' in fragment', 'foo://localhost/bar#oh' ~ $ok ~ 'yes',
        *.scheme eq 'foo',
        *.authority eq 'localhost',
        *.path eq '/bar',
        !*.query.defined,
        *.fragment eq "oh{$ok}yes";
}

parses :relative, 'Relative with authority (1)',
    '//example.org/scheme-relative/URI/with/absolute/path/to/resource.txt.',
    !*.scheme.defined,
    *.authority eq 'example.org',
    *.path eq '/scheme-relative/URI/with/absolute/path/to/resource.txt.',
    !*.query.defined,
    !*.fragment.defined;

parses :relative, 'Relative with authority (2)',
    '//example.org/scheme-relative/URI/with/absolute/path/to/resource',
    !*.scheme.defined,
    *.authority eq 'example.org',
    *.path eq '/scheme-relative/URI/with/absolute/path/to/resource',
    !*.query.defined,
    !*.fragment.defined;

parses :relative, 'Relative with absolute path',
    '/relative/URI/with/absolute/path/to/resource.txt',
    !*.scheme.defined,
    !*.authority.defined,
    *.path eq '/relative/URI/with/absolute/path/to/resource.txt',
    !*.query.defined,
    !*.fragment.defined;

parses :relative, 'Relative with relative path (1)',
    'relative/path/to/resource.txt',
    !*.scheme.defined,
    !*.authority.defined,
    *.path eq 'relative/path/to/resource.txt',
    !*.query.defined,
    !*.fragment.defined;

parses :relative, 'Relative with relative path (2)',
    '../../../resource.txt',
    !*.scheme.defined,
    !*.authority.defined,
    *.path eq '../../../resource.txt',
    !*.query.defined,
    !*.fragment.defined;

parses :relative, 'Relative with relative path (3)',
    'resource.txt',
    !*.scheme.defined,
    !*.authority.defined,
    *.path eq 'resource.txt',
    !*.query.defined,
    !*.fragment.defined;

parses :relative, 'Relative with relative path and fragment',
    './resource.txt#frag01',
    !*.scheme.defined,
    !*.authority.defined,
    *.path eq './resource.txt',
    !*.query.defined,
    *.fragment eq 'frag01';

parses :relative, 'Relative with fragment only',
    '#frag01',
    !*.scheme.defined,
    !*.authority.defined,
    *.path eq '',
    !*.query.defined,
    *.fragment eq 'frag01';

parses :ref, 'An absolute URI when asked to parse a reference',
    'foo://example.com:8042/over/there',
    *.scheme eq 'foo',
    *.authority eq 'example.com:8042',
    *.path eq '/over/there',
    !*.query.defined,
    !*.fragment.defined;

parses :ref, 'A relative URI when asked to parse a reference',
    'relative/path/to/resource.txt',
    !*.scheme.defined,
    !*.authority.defined,
    *.path eq 'relative/path/to/resource.txt',
    !*.query.defined,
    !*.fragment.defined;

parses 'URI with forbidden characters',
    'http://foo.com/user/password?name[%23post_render][0]=printf&name[%23markup]=ABCZ%0A',
    *.scheme eq 'http',
    *.authority eq 'foo.com',
    *.path eq '/user/password',
    *.query eq 'name%5B%23post_render%5D%5B0%5D=printf&name%5B%23markup%5D=ABCZ%0A';

parses :relative, 'URI with forbidden characters, relative',
    '/user/password?name[%23post_render][0]=printf&name[%23markup]=ABCZ%0A',
    !*.scheme.defined,
    !*.authority.defined,
    *.path eq '/user/password',
    *.query eq 'name%5B%23post_render%5D%5B0%5D=printf&name%5B%23markup%5D=ABCZ%0A';

given Cro::Uri.parse('http://a/b/c/d;p?q') -> $base {
    is $base.add("g:h"), "g:h";
    is $base.add("g"), "http://a/b/c/g";
    is $base.add("./g"), "http://a/b/c/g";
    is $base.add("g/"), "http://a/b/c/g/";
    is $base.add("/g"), "http://a/g";
    is $base.add("//g"), "http://g";
    is $base.add("?y") , "http://a/b/c/d;p?y";
    is $base.add("g?y"), "http://a/b/c/g?y";
    is $base.add("#s"), "http://a/b/c/d;p?q#s";
    is $base.add("g#s"), "http://a/b/c/g#s";
    is $base.add("g?y#s"), "http://a/b/c/g?y#s";
    is $base.add(";x"), "http://a/b/c/;x";
    is $base.add("g;x"), "http://a/b/c/g;x";
    is $base.add("g;x?y#s"), "http://a/b/c/g;x?y#s";
    is $base.add(""), "http://a/b/c/d;p?q";
    is $base.add("."), "http://a/b/c/";
    is $base.add("./"), "http://a/b/c/";
    is $base.add(".."), "http://a/b/";
    is $base.add("../"), "http://a/b/";
    is $base.add("../g"), "http://a/b/g";
    is $base.add("../.."), "http://a/";
    is $base.add("../../"), "http://a/";
    is $base.add("../../g"), "http://a/g";
    is $base.add("../../../g"), "http://a/g";
    is $base.add("../../../../g"), "http://a/g";
    is $base.add("/./g"),  "http://a/g";
    is $base.add("/../g"),  "http://a/g";
    is $base.add("g."), "http://a/b/c/g.";
    is $base.add(".g"), "http://a/b/c/.g";
    is $base.add("g..") , "http://a/b/c/g..";
    is $base.add("..g"), "http://a/b/c/..g";
    is $base.add("./../g"), "http://a/b/g";
    is $base.add("./g/."), "http://a/b/c/g/";
    is $base.add("g/./h"), "http://a/b/c/g/h";
    is $base.add("g/../h"), "http://a/b/c/h";
    is $base.add("g;x=1/./y"), "http://a/b/c/g;x=1/y";
    is $base.add("g;x=1/../y"), "http://a/b/c/y";
    is $base.add("g?y/./x"), "http://a/b/c/g?y/./x";
    is $base.add("g?y/../x"), "http://a/b/c/g?y/../x";
    is $base.add("g#s/./x"), "http://a/b/c/g#s/./x";
    is $base.add("g#s/../x"), "http://a/b/c/g#s/../x";
    is $base.add("http:g"), "http:g";
    is $base.add("/"), "http://a/";
}

given Cro::Uri.parse('foo://bob:[email protected]:4242/a').add('b') {
    is .scheme, 'foo', 'Correct scheme after relative resolution';
    is .authority, 'bob:[email protected]:4242', 'Correct authority after relative resolution';
    is .host, 'fbi.gov', 'Correct host after relative resolution';
    is .host-class, Cro::ResourceIdentifier::Host::RegName, 'Correct host class after relative resolution';
    is .port, 4242, 'Correct port after relative resolution';
    is .userinfo, 'bob:s3cr3t', 'Correct user info after relative resolution';
    is .user, 'bob', 'Correct user after relative resolution';
    is .password, 's3cr3t', 'Correct password after relative resolution';
}

for <http://www.example.com/{term:1}/{term}/{test*}/foo{?query,number}
     http://www.example.com/v1/company/>.kv -> $i, $v {
    ok Cro::Uri::URI-Template.parse($v), "Regex $i for URI Template passed";
}

{
    use Cro::Uri :decode-percents;
    is decode-percents('abcd1234'), 'abcd1234', 'decode-percents with nothing to decode works';
    is decode-percents('a%24%3Fb%21%2F3%5C45%3A6'), 'a$?b!/3\45:6', 'decode-percents in ASCII range works';
    is decode-percents('p%C5%99ib%C4%9Bh'), 'přiběh', 'decode-percents decodes non-ASCII as UTF-8 octets';
}

{
    use Cro::Uri :encode-percents;
    is encode-percents('abcd1234'), 'abcd1234', 'encode-percents with nothing to encode works';
    is encode-percents('a$?b!/3\45:6'), 'a%24%3Fb%21%2F3%5C45%3A6', 'encode-percents in ASCII range works';
    is encode-percents('přiběh'), 'p%C5%99ib%C4%9Bh', 'encode-percents encodes non-ASCII as UTF-8 octets';
    is encode-percents("\n"), '%0A', 'encode-percents pads a 0';
}

{
    my class FooUri is Cro::Uri {}
    isa-ok FooUri.parse($long-name), FooUri, '.parse respects subclassing';
    isa-ok FooUri.parse-ref($long-name), FooUri, '.parse-ref respects subclassing';
    isa-ok FooUri.parse-relative('/foo'), FooUri, '.parse-relative respects subclassing';
}

done-testing;