mirror of
https://github.com/OpenXE-org/OpenXE.git
synced 2025-01-01 01:20:29 +01:00
374 lines
10 KiB
PHP
374 lines
10 KiB
PHP
<?php
|
|
|
|
namespace Sabre\Uri;
|
|
|
|
/**
|
|
* This file contains all the uri handling functions.
|
|
*
|
|
* @copyright Copyright (C) fruux GmbH (https://fruux.com/)
|
|
* @author Evert Pot (http://evertpot.com/)
|
|
* @license http://sabre.io/license/
|
|
*/
|
|
|
|
/**
|
|
* Resolves relative urls, like a browser would.
|
|
*
|
|
* This function takes a basePath, which itself _may_ also be relative, and
|
|
* then applies the relative path on top of it.
|
|
*
|
|
* @param string $basePath
|
|
* @param string $newPath
|
|
* @return string
|
|
*/
|
|
function resolve($basePath, $newPath) {
|
|
|
|
$base = parse($basePath);
|
|
$delta = parse($newPath);
|
|
|
|
$pick = function($part) use ($base, $delta) {
|
|
|
|
if ($delta[$part]) {
|
|
return $delta[$part];
|
|
} elseif ($base[$part]) {
|
|
return $base[$part];
|
|
}
|
|
return null;
|
|
|
|
};
|
|
|
|
// If the new path defines a scheme, it's absolute and we can just return
|
|
// that.
|
|
if ($delta['scheme']) {
|
|
return build($delta);
|
|
}
|
|
|
|
$newParts = [];
|
|
|
|
$newParts['scheme'] = $pick('scheme');
|
|
$newParts['host'] = $pick('host');
|
|
$newParts['port'] = $pick('port');
|
|
|
|
$path = '';
|
|
if ($delta['path']) {
|
|
// If the path starts with a slash
|
|
if ($delta['path'][0] === '/') {
|
|
$path = $delta['path'];
|
|
} else {
|
|
// Removing last component from base path.
|
|
$path = $base['path'];
|
|
if (strpos($path, '/') !== false) {
|
|
$path = substr($path, 0, strrpos($path, '/'));
|
|
}
|
|
$path .= '/' . $delta['path'];
|
|
}
|
|
} else {
|
|
$path = $base['path'] ?: '/';
|
|
}
|
|
// Removing .. and .
|
|
$pathParts = explode('/', $path);
|
|
$newPathParts = [];
|
|
foreach ($pathParts as $pathPart) {
|
|
|
|
switch ($pathPart) {
|
|
//case '' :
|
|
case '.' :
|
|
break;
|
|
case '..' :
|
|
array_pop($newPathParts);
|
|
break;
|
|
default :
|
|
$newPathParts[] = $pathPart;
|
|
break;
|
|
}
|
|
}
|
|
|
|
$path = implode('/', $newPathParts);
|
|
|
|
// If the source url ended with a /, we want to preserve that.
|
|
$newParts['path'] = $path;
|
|
if ($delta['query']) {
|
|
$newParts['query'] = $delta['query'];
|
|
} elseif (!empty($base['query']) && empty($delta['host']) && empty($delta['path'])) {
|
|
// Keep the old query if host and path didn't change
|
|
$newParts['query'] = $base['query'];
|
|
}
|
|
if ($delta['fragment']) {
|
|
$newParts['fragment'] = $delta['fragment'];
|
|
}
|
|
return build($newParts);
|
|
|
|
}
|
|
|
|
/**
|
|
* Takes a URI or partial URI as its argument, and normalizes it.
|
|
*
|
|
* After normalizing a URI, you can safely compare it to other URIs.
|
|
* This function will for instance convert a %7E into a tilde, according to
|
|
* rfc3986.
|
|
*
|
|
* It will also change a %3a into a %3A.
|
|
*
|
|
* @param string $uri
|
|
* @return string
|
|
*/
|
|
function normalize($uri) {
|
|
|
|
$parts = parse($uri);
|
|
|
|
if (!empty($parts['path'])) {
|
|
$pathParts = explode('/', ltrim($parts['path'], '/'));
|
|
$newPathParts = [];
|
|
foreach ($pathParts as $pathPart) {
|
|
switch ($pathPart) {
|
|
case '.':
|
|
// skip
|
|
break;
|
|
case '..' :
|
|
// One level up in the hierarchy
|
|
array_pop($newPathParts);
|
|
break;
|
|
default :
|
|
// Ensuring that everything is correctly percent-encoded.
|
|
$newPathParts[] = rawurlencode(rawurldecode($pathPart));
|
|
break;
|
|
}
|
|
}
|
|
$parts['path'] = '/' . implode('/', $newPathParts);
|
|
}
|
|
|
|
if ($parts['scheme']) {
|
|
$parts['scheme'] = strtolower($parts['scheme']);
|
|
$defaultPorts = [
|
|
'http' => '80',
|
|
'https' => '443',
|
|
];
|
|
|
|
if (!empty($parts['port']) && isset($defaultPorts[$parts['scheme']]) && $defaultPorts[$parts['scheme']] == $parts['port']) {
|
|
// Removing default ports.
|
|
unset($parts['port']);
|
|
}
|
|
// A few HTTP specific rules.
|
|
switch ($parts['scheme']) {
|
|
case 'http' :
|
|
case 'https' :
|
|
if (empty($parts['path'])) {
|
|
// An empty path is equivalent to / in http.
|
|
$parts['path'] = '/';
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ($parts['host']) $parts['host'] = strtolower($parts['host']);
|
|
|
|
return build($parts);
|
|
|
|
}
|
|
|
|
/**
|
|
* Parses a URI and returns its individual components.
|
|
*
|
|
* This method largely behaves the same as PHP's parse_url, except that it will
|
|
* return an array with all the array keys, including the ones that are not
|
|
* set by parse_url, which makes it a bit easier to work with.
|
|
*
|
|
* Unlike PHP's parse_url, it will also convert any non-ascii characters to
|
|
* percent-encoded strings. PHP's parse_url corrupts these characters on OS X.
|
|
*
|
|
* @param string $uri
|
|
* @return array
|
|
*/
|
|
function parse($uri) {
|
|
|
|
// Normally a URI must be ASCII, however. However, often it's not and
|
|
// parse_url might corrupt these strings.
|
|
//
|
|
// For that reason we take any non-ascii characters from the uri and
|
|
// uriencode them first.
|
|
$uri = preg_replace_callback(
|
|
'/[^[:ascii:]]/u',
|
|
function($matches) {
|
|
return rawurlencode($matches[0]);
|
|
},
|
|
$uri
|
|
);
|
|
|
|
$result = parse_url($uri);
|
|
if (!$result) {
|
|
$result = _parse_fallback($uri);
|
|
}
|
|
|
|
return
|
|
$result + [
|
|
'scheme' => null,
|
|
'host' => null,
|
|
'path' => null,
|
|
'port' => null,
|
|
'user' => null,
|
|
'query' => null,
|
|
'fragment' => null,
|
|
];
|
|
|
|
}
|
|
|
|
/**
|
|
* This function takes the components returned from PHP's parse_url, and uses
|
|
* it to generate a new uri.
|
|
*
|
|
* @param array $parts
|
|
* @return string
|
|
*/
|
|
function build(array $parts) {
|
|
|
|
$uri = '';
|
|
|
|
$authority = '';
|
|
if (!empty($parts['host'])) {
|
|
$authority = $parts['host'];
|
|
if (!empty($parts['user'])) {
|
|
$authority = $parts['user'] . '@' . $authority;
|
|
}
|
|
if (!empty($parts['port'])) {
|
|
$authority = $authority . ':' . $parts['port'];
|
|
}
|
|
}
|
|
|
|
if (!empty($parts['scheme'])) {
|
|
// If there's a scheme, there's also a host.
|
|
$uri = $parts['scheme'] . ':';
|
|
|
|
}
|
|
if ($authority || (!empty($parts['scheme']) && $parts['scheme'] === 'file')) {
|
|
// No scheme, but there is a host.
|
|
$uri .= '//' . $authority;
|
|
|
|
}
|
|
|
|
if (!empty($parts['path'])) {
|
|
$uri .= $parts['path'];
|
|
}
|
|
if (!empty($parts['query'])) {
|
|
$uri .= '?' . $parts['query'];
|
|
}
|
|
if (!empty($parts['fragment'])) {
|
|
$uri .= '#' . $parts['fragment'];
|
|
}
|
|
|
|
return $uri;
|
|
|
|
}
|
|
|
|
/**
|
|
* Returns the 'dirname' and 'basename' for a path.
|
|
*
|
|
* The reason there is a custom function for this purpose, is because
|
|
* basename() is locale aware (behaviour changes if C locale or a UTF-8 locale
|
|
* is used) and we need a method that just operates on UTF-8 characters.
|
|
*
|
|
* In addition basename and dirname are platform aware, and will treat
|
|
* backslash (\) as a directory separator on windows.
|
|
*
|
|
* This method returns the 2 components as an array.
|
|
*
|
|
* If there is no dirname, it will return an empty string. Any / appearing at
|
|
* the end of the string is stripped off.
|
|
*
|
|
* @param string $path
|
|
* @return array
|
|
*/
|
|
function split($path) {
|
|
|
|
$matches = [];
|
|
if (preg_match('/^(?:(?:(.*)(?:\/+))?([^\/]+))(?:\/?)$/u', $path, $matches)) {
|
|
return [$matches[1], $matches[2]];
|
|
}
|
|
return [null,null];
|
|
|
|
}
|
|
|
|
/**
|
|
* This function is another implementation of parse_url, except this one is
|
|
* fully written in PHP.
|
|
*
|
|
* The reason is that the PHP bug team is not willing to admit that there are
|
|
* bugs in the parse_url implementation.
|
|
*
|
|
* This function is only called if the main parse method fails. It's pretty
|
|
* crude and probably slow, so the original parse_url is usually preferred.
|
|
*
|
|
* @param string $uri
|
|
* @return array
|
|
*/
|
|
function _parse_fallback($uri) {
|
|
|
|
// Normally a URI must be ASCII, however. However, often it's not and
|
|
// parse_url might corrupt these strings.
|
|
//
|
|
// For that reason we take any non-ascii characters from the uri and
|
|
// uriencode them first.
|
|
$uri = preg_replace_callback(
|
|
'/[^[:ascii:]]/u',
|
|
function($matches) {
|
|
return rawurlencode($matches[0]);
|
|
},
|
|
$uri
|
|
);
|
|
|
|
$result = [
|
|
'scheme' => null,
|
|
'host' => null,
|
|
'port' => null,
|
|
'user' => null,
|
|
'path' => null,
|
|
'fragment' => null,
|
|
'query' => null,
|
|
];
|
|
|
|
if (preg_match('% ^([A-Za-z][A-Za-z0-9+-\.]+): %x', $uri, $matches)) {
|
|
|
|
$result['scheme'] = $matches[1];
|
|
// Take what's left.
|
|
$uri = substr($uri, strlen($result['scheme']) + 1);
|
|
|
|
}
|
|
|
|
// Taking off a fragment part
|
|
if (strpos($uri, '#') !== false) {
|
|
list($uri, $result['fragment']) = explode('#', $uri, 2);
|
|
}
|
|
// Taking off the query part
|
|
if (strpos($uri, '?') !== false) {
|
|
list($uri, $result['query']) = explode('?', $uri, 2);
|
|
}
|
|
|
|
if (substr($uri, 0, 3) === '///') {
|
|
// The triple slash uris are a bit unusual, but we have special handling
|
|
// for them.
|
|
$result['path'] = substr($uri, 2);
|
|
$result['host'] = '';
|
|
} elseif (substr($uri, 0, 2) === '//') {
|
|
// Uris that have an authority part.
|
|
$regex = '
|
|
%^
|
|
//
|
|
(?: (?<user> [^:@]+) (: (?<pass> [^@]+)) @)?
|
|
(?<host> ( [^:/]* | \[ [^\]]+ \] ))
|
|
(?: : (?<port> [0-9]+))?
|
|
(?<path> / .*)?
|
|
$%x
|
|
';
|
|
if (!preg_match($regex, $uri, $matches)) {
|
|
throw new InvalidUriException('Invalid, or could not parse URI');
|
|
}
|
|
if ($matches['host']) $result['host'] = $matches['host'];
|
|
if ($matches['port']) $result['port'] = (int)$matches['port'];
|
|
if (isset($matches['path'])) $result['path'] = $matches['path'];
|
|
if ($matches['user']) $result['user'] = $matches['user'];
|
|
if ($matches['pass']) $result['pass'] = $matches['pass'];
|
|
} else {
|
|
$result['path'] = $uri;
|
|
}
|
|
|
|
return $result;
|
|
}
|