Fixed merge conflicts

This commit is contained in:
Alejandro Celaya
2022-05-09 08:21:19 +02:00
9 changed files with 110 additions and 25 deletions

View File

@@ -11,8 +11,12 @@ use GuzzleHttp\RequestOptions;
use Psr\Http\Message\ResponseInterface;
use Shlinkio\Shlink\Core\Exception\InvalidUrlException;
use Shlinkio\Shlink\Core\Options\UrlShortenerOptions;
use Throwable;
use function preg_match;
use function str_contains;
use function str_starts_with;
use function strtolower;
use function trim;
use const Shlinkio\Shlink\TITLE_TAG_VALUE;
@@ -36,7 +40,7 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface
return;
}
$this->validateUrlAndGetResponse($url, true);
$this->validateUrlAndGetResponse($url);
}
public function validateUrlWithTitle(string $url, bool $doValidate): ?string
@@ -45,30 +49,55 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface
return null;
}
$response = $this->validateUrlAndGetResponse($url, $doValidate);
if ($response === null || ! $this->options->autoResolveTitles()) {
if (! $this->options->autoResolveTitles()) {
$this->validateUrlAndGetResponse($url, self::METHOD_HEAD);
return null;
}
$body = $response->getBody()->__toString();
preg_match(TITLE_TAG_VALUE, $body, $matches);
$response = $doValidate ? $this->validateUrlAndGetResponse($url) : $this->getResponse($url);
if ($response === null) {
return null;
}
$contentType = strtolower($response->getHeaderLine('Content-Type'));
if (! str_starts_with($contentType, 'text/html')) {
return null;
}
$collectedBody = '';
$body = $response->getBody();
// With streaming enabled, we can walk the body until the </title> tag is found, and then stop
while (! str_contains($collectedBody, '</title>') && ! $body->eof()) {
$collectedBody .= $body->read(1024);
}
preg_match(TITLE_TAG_VALUE, $collectedBody, $matches);
return isset($matches[1]) ? trim($matches[1]) : null;
}
private function validateUrlAndGetResponse(string $url, bool $throwOnError): ?ResponseInterface
/**
* @param self::METHOD_GET|self::METHOD_HEAD $method
* @throws InvalidUrlException
*/
private function validateUrlAndGetResponse(string $url, string $method = self::METHOD_GET): ResponseInterface
{
try {
return $this->httpClient->request(self::METHOD_GET, $url, [
return $this->httpClient->request($method, $url, [
RequestOptions::ALLOW_REDIRECTS => ['max' => self::MAX_REDIRECTS],
RequestOptions::IDN_CONVERSION => true,
// Making the request with a browser's user agent makes the validation closer to a real user
RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT],
RequestOptions::STREAM => true, // This ensures large files are not fully downloaded if not needed
]);
} catch (GuzzleException $e) {
if ($throwOnError) {
throw InvalidUrlException::fromUrl($url, $e);
}
throw InvalidUrlException::fromUrl($url, $e);
}
}
private function getResponse(string $url): ?ResponseInterface
{
try {
return $this->validateUrlAndGetResponse($url);
} catch (Throwable) {
return null;
}
}