Remove support to validate long URLs during short URL creation/edition

This commit is contained in:
Alejandro Celaya
2024-02-17 12:02:57 +01:00
parent 5c1ab02753
commit e3de403c6c
28 changed files with 198 additions and 619 deletions

View File

@@ -1,35 +0,0 @@
<?php
declare(strict_types=1);
namespace Shlinkio\Shlink\Core\Exception;
use Fig\Http\Message\StatusCodeInterface;
use Mezzio\ProblemDetails\Exception\CommonProblemDetailsExceptionTrait;
use Mezzio\ProblemDetails\Exception\ProblemDetailsExceptionInterface;
use Throwable;
use function Shlinkio\Shlink\Core\toProblemDetailsType;
use function sprintf;
class InvalidUrlException extends DomainException implements ProblemDetailsExceptionInterface
{
use CommonProblemDetailsExceptionTrait;
private const TITLE = 'Invalid URL';
public const ERROR_CODE = 'invalid-url';
public static function fromUrl(string $url, ?Throwable $previous = null): self
{
$status = StatusCodeInterface::STATUS_BAD_REQUEST;
$e = new self(sprintf('Provided URL %s is invalid. Try with a different one.', $url), $status, $previous);
$e->detail = $e->getMessage();
$e->title = self::TITLE;
$e->type = toProblemDetailsType(self::ERROR_CODE);
$e->status = $status;
$e->additional = ['url' => $url];
return $e;
}
}

View File

@@ -120,7 +120,6 @@ class ShortUrl extends AbstractEntity
?ShortUrlRelationResolverInterface $relationResolver = null,
): self {
$meta = [
ShortUrlInputFilter::VALIDATE_URL => false,
ShortUrlInputFilter::LONG_URL => $url->longUrl,
ShortUrlInputFilter::DOMAIN => $url->domain,
ShortUrlInputFilter::TAGS => $url->tags,

View File

@@ -4,31 +4,90 @@ declare(strict_types=1);
namespace Shlinkio\Shlink\Core\ShortUrl\Helper;
use Shlinkio\Shlink\Core\Exception\InvalidUrlException;
use Shlinkio\Shlink\Core\Util\UrlValidatorInterface;
use Fig\Http\Message\RequestMethodInterface;
use GuzzleHttp\ClientInterface;
use GuzzleHttp\RequestOptions;
use Psr\Http\Message\ResponseInterface;
use Shlinkio\Shlink\Core\Options\UrlShortenerOptions;
use Throwable;
class ShortUrlTitleResolutionHelper implements ShortUrlTitleResolutionHelperInterface
use function html_entity_decode;
use function preg_match;
use function str_contains;
use function str_starts_with;
use function strtolower;
use function trim;
use const Shlinkio\Shlink\TITLE_TAG_VALUE;
readonly class ShortUrlTitleResolutionHelper implements ShortUrlTitleResolutionHelperInterface
{
public function __construct(private readonly UrlValidatorInterface $urlValidator)
{
private const MAX_REDIRECTS = 15;
private const CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
. 'Chrome/121.0.0.0 Safari/537.36';
public function __construct(
private ClientInterface $httpClient,
private UrlShortenerOptions $options,
) {
}
/**
* @deprecated TODO Rename to processTitle once URL validation is removed with Shlink 4.0.0
* Move relevant logic from URL validator here.
* @template T of TitleResolutionModelInterface
* @param T $data
* @return T
* @throws InvalidUrlException
*/
public function processTitleAndValidateUrl(TitleResolutionModelInterface $data): TitleResolutionModelInterface
public function processTitle(TitleResolutionModelInterface $data): TitleResolutionModelInterface
{
if ($data->hasTitle()) {
$this->urlValidator->validateUrl($data->getLongUrl(), $data->doValidateUrl());
if (! $this->options->autoResolveTitles || $data->hasTitle()) {
return $data;
}
$title = $this->urlValidator->validateUrlWithTitle($data->getLongUrl(), $data->doValidateUrl());
return $title === null ? $data : $data->withResolvedTitle($title);
$response = $this->fetchUrl($data->getLongUrl());
if ($response === null) {
return $data;
}
$contentType = strtolower($response->getHeaderLine('Content-Type'));
if (! str_starts_with($contentType, 'text/html')) {
return $data;
}
$title = $this->tryToResolveTitle($response);
return $title !== null ? $data->withResolvedTitle($title) : $data;
}
private function fetchUrl(string $url): ?ResponseInterface
{
try {
return $this->httpClient->request(RequestMethodInterface::METHOD_GET, $url, [
// TODO Add a sensible timeout that prevents hanging here forever
// Prevent potential infinite redirection loops
RequestOptions::ALLOW_REDIRECTS => ['max' => self::MAX_REDIRECTS],
RequestOptions::IDN_CONVERSION => true,
// Making the request with a browser's user agent results in responses closer to a real user
RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT],
RequestOptions::STREAM => true, // This ensures large files are not fully downloaded if not needed
]);
} catch (Throwable) {
return null;
}
}
private function tryToResolveTitle(ResponseInterface $response): ?string
{
$collectedBody = '';
$body = $response->getBody();
// With streaming enabled, we can walk the body until the </title> tag is found, and then stop
while (! str_contains($collectedBody, '</title>') && ! $body->eof()) {
$collectedBody .= $body->read(1024);
}
preg_match(TITLE_TAG_VALUE, $collectedBody, $matches);
return isset($matches[1]) ? $this->normalizeTitle($matches[1]) : null;
}
private function normalizeTitle(string $title): string
{
return html_entity_decode(trim($title));
}
}

View File

@@ -4,16 +4,12 @@ declare(strict_types=1);
namespace Shlinkio\Shlink\Core\ShortUrl\Helper;
use Shlinkio\Shlink\Core\Exception\InvalidUrlException;
interface ShortUrlTitleResolutionHelperInterface
{
/**
* @deprecated TODO Rename to processTitle once URL validation is removed with Shlink 4.0.0
* @template T of TitleResolutionModelInterface
* @param T $data
* @return T
* @throws InvalidUrlException
*/
public function processTitleAndValidateUrl(TitleResolutionModelInterface $data): TitleResolutionModelInterface;
public function processTitle(TitleResolutionModelInterface $data): TitleResolutionModelInterface;
}

View File

@@ -10,8 +10,5 @@ interface TitleResolutionModelInterface
public function getLongUrl(): string;
/** @deprecated */
public function doValidateUrl(): bool;
public function withResolvedTitle(string $title): static;
}

View File

@@ -35,8 +35,6 @@ final class ShortUrlCreation implements TitleResolutionModelInterface
public readonly bool $findIfExists = false,
public readonly ?string $domain = null,
public readonly int $shortCodeLength = 5,
/** @deprecated */
public readonly bool $validateUrl = false,
public readonly ?ApiKey $apiKey = null,
public readonly array $tags = [],
public readonly ?string $title = null,
@@ -75,7 +73,6 @@ final class ShortUrlCreation implements TitleResolutionModelInterface
$inputFilter,
ShortUrlInputFilter::SHORT_CODE_LENGTH,
) ?? DEFAULT_SHORT_CODES_LENGTH,
validateUrl: getOptionalBoolFromInputFilter($inputFilter, ShortUrlInputFilter::VALIDATE_URL) ?? false,
apiKey: $inputFilter->getValue(ShortUrlInputFilter::API_KEY),
tags: $inputFilter->getValue(ShortUrlInputFilter::TAGS),
title: $inputFilter->getValue(ShortUrlInputFilter::TITLE),
@@ -97,7 +94,6 @@ final class ShortUrlCreation implements TitleResolutionModelInterface
findIfExists: $this->findIfExists,
domain: $this->domain,
shortCodeLength: $this->shortCodeLength,
validateUrl: $this->validateUrl,
apiKey: $this->apiKey,
tags: $this->tags,
title: $title,
@@ -137,12 +133,6 @@ final class ShortUrlCreation implements TitleResolutionModelInterface
return $this->domain !== null;
}
/** @deprecated */
public function doValidateUrl(): bool
{
return $this->validateUrl;
}
public function hasTitle(): bool
{
return $this->title !== null;

View File

@@ -38,8 +38,6 @@ final class ShortUrlEdition implements TitleResolutionModelInterface
private readonly bool $titlePropWasProvided = false,
public readonly ?string $title = null,
public readonly bool $titleWasAutoResolved = false,
/** @deprecated */
public readonly bool $validateUrl = false,
private readonly bool $crawlablePropWasProvided = false,
public readonly bool $crawlable = false,
private readonly bool $forwardQueryPropWasProvided = false,
@@ -76,7 +74,6 @@ final class ShortUrlEdition implements TitleResolutionModelInterface
tags: $inputFilter->getValue(ShortUrlInputFilter::TAGS),
titlePropWasProvided: array_key_exists(ShortUrlInputFilter::TITLE, $data),
title: $inputFilter->getValue(ShortUrlInputFilter::TITLE),
validateUrl: getOptionalBoolFromInputFilter($inputFilter, ShortUrlInputFilter::VALIDATE_URL) ?? false,
crawlablePropWasProvided: array_key_exists(ShortUrlInputFilter::CRAWLABLE, $data),
crawlable: $inputFilter->getValue(ShortUrlInputFilter::CRAWLABLE),
forwardQueryPropWasProvided: array_key_exists(ShortUrlInputFilter::FORWARD_QUERY, $data),
@@ -102,7 +99,6 @@ final class ShortUrlEdition implements TitleResolutionModelInterface
titlePropWasProvided: $this->titlePropWasProvided,
title: $title,
titleWasAutoResolved: true,
validateUrl: $this->validateUrl,
crawlablePropWasProvided: $this->crawlablePropWasProvided,
crawlable: $this->crawlable,
forwardQueryPropWasProvided: $this->forwardQueryPropWasProvided,
@@ -155,12 +151,6 @@ final class ShortUrlEdition implements TitleResolutionModelInterface
return $this->titleWasAutoResolved;
}
/** @deprecated */
public function doValidateUrl(): bool
{
return $this->validateUrl;
}
public function crawlableWasProvided(): bool
{
return $this->crawlablePropWasProvided;

View File

@@ -36,8 +36,6 @@ class ShortUrlInputFilter extends InputFilter
public const SHORT_CODE_LENGTH = 'shortCodeLength';
public const LONG_URL = 'longUrl';
public const DEVICE_LONG_URLS = 'deviceLongUrls';
/** @deprecated */
public const VALIDATE_URL = 'validateUrl';
public const API_KEY = 'apiKey';
public const TAGS = 'tags';
public const TITLE = 'title';
@@ -97,9 +95,8 @@ class ShortUrlInputFilter extends InputFilter
$this->add($this->createBooleanInput(self::FIND_IF_EXISTS, false));
// These cannot be defined as a boolean inputs, because they can actually have 3 values: true, false and null.
// This cannot be defined as a boolean inputs, because they can actually have 3 values: true, false and null.
// Defining them as boolean will make null fall back to false, which is not the desired behavior.
$this->add($this->createInput(self::VALIDATE_URL, false));
$this->add($this->createInput(self::FORWARD_QUERY, false));
$domain = $this->createInput(self::DOMAIN, false);

View File

@@ -5,7 +5,6 @@ declare(strict_types=1);
namespace Shlinkio\Shlink\Core\ShortUrl;
use Doctrine\ORM;
use Shlinkio\Shlink\Core\Exception\InvalidUrlException;
use Shlinkio\Shlink\Core\Exception\ShortUrlNotFoundException;
use Shlinkio\Shlink\Core\ShortUrl\Entity\ShortUrl;
use Shlinkio\Shlink\Core\ShortUrl\Helper\ShortUrlTitleResolutionHelperInterface;
@@ -26,7 +25,6 @@ class ShortUrlService implements ShortUrlServiceInterface
/**
* @throws ShortUrlNotFoundException
* @throws InvalidUrlException
*/
public function updateShortUrl(
ShortUrlIdentifier $identifier,
@@ -34,7 +32,7 @@ class ShortUrlService implements ShortUrlServiceInterface
?ApiKey $apiKey = null,
): ShortUrl {
if ($shortUrlEdit->longUrlWasProvided()) {
$shortUrlEdit = $this->titleResolutionHelper->processTitleAndValidateUrl($shortUrlEdit);
$shortUrlEdit = $this->titleResolutionHelper->processTitle($shortUrlEdit);
}
$shortUrl = $this->urlResolver->resolveShortUrl($identifier, $apiKey);

View File

@@ -4,7 +4,6 @@ declare(strict_types=1);
namespace Shlinkio\Shlink\Core\ShortUrl;
use Shlinkio\Shlink\Core\Exception\InvalidUrlException;
use Shlinkio\Shlink\Core\Exception\ShortUrlNotFoundException;
use Shlinkio\Shlink\Core\ShortUrl\Entity\ShortUrl;
use Shlinkio\Shlink\Core\ShortUrl\Model\ShortUrlEdition;
@@ -15,7 +14,6 @@ interface ShortUrlServiceInterface
{
/**
* @throws ShortUrlNotFoundException
* @throws InvalidUrlException
*/
public function updateShortUrl(
ShortUrlIdentifier $identifier,

View File

@@ -8,7 +8,6 @@ use Doctrine\ORM\EntityManagerInterface;
use Psr\Container\ContainerExceptionInterface;
use Psr\EventDispatcher\EventDispatcherInterface;
use Shlinkio\Shlink\Core\EventDispatcher\Event\ShortUrlCreated;
use Shlinkio\Shlink\Core\Exception\InvalidUrlException;
use Shlinkio\Shlink\Core\Exception\NonUniqueSlugException;
use Shlinkio\Shlink\Core\ShortUrl\Entity\ShortUrl;
use Shlinkio\Shlink\Core\ShortUrl\Helper\ShortCodeUniquenessHelperInterface;
@@ -31,7 +30,6 @@ class UrlShortener implements UrlShortenerInterface
/**
* @throws NonUniqueSlugException
* @throws InvalidUrlException
*/
public function shorten(ShortUrlCreation $creation): UrlShorteningResult
{
@@ -41,7 +39,7 @@ class UrlShortener implements UrlShortenerInterface
return UrlShorteningResult::withoutErrorOnEventDispatching($existingShortUrl);
}
$creation = $this->titleResolutionHelper->processTitleAndValidateUrl($creation);
$creation = $this->titleResolutionHelper->processTitle($creation);
/** @var ShortUrl $newShortUrl */
$newShortUrl = $this->em->wrapInTransaction(function () use ($creation): ShortUrl {

View File

@@ -4,7 +4,6 @@ declare(strict_types=1);
namespace Shlinkio\Shlink\Core\ShortUrl;
use Shlinkio\Shlink\Core\Exception\InvalidUrlException;
use Shlinkio\Shlink\Core\Exception\NonUniqueSlugException;
use Shlinkio\Shlink\Core\ShortUrl\Model\ShortUrlCreation;
use Shlinkio\Shlink\Core\ShortUrl\Model\UrlShorteningResult;
@@ -13,7 +12,6 @@ interface UrlShortenerInterface
{
/**
* @throws NonUniqueSlugException
* @throws InvalidUrlException
*/
public function shorten(ShortUrlCreation $creation): UrlShorteningResult;
}

View File

@@ -1,116 +0,0 @@
<?php
declare(strict_types=1);
namespace Shlinkio\Shlink\Core\Util;
use Fig\Http\Message\RequestMethodInterface;
use GuzzleHttp\ClientInterface;
use GuzzleHttp\Exception\GuzzleException;
use GuzzleHttp\RequestOptions;
use Psr\Http\Message\ResponseInterface;
use Shlinkio\Shlink\Core\Exception\InvalidUrlException;
use Shlinkio\Shlink\Core\Options\UrlShortenerOptions;
use Throwable;
use function html_entity_decode;
use function preg_match;
use function str_contains;
use function str_starts_with;
use function strtolower;
use function trim;
use const Shlinkio\Shlink\TITLE_TAG_VALUE;
/** @deprecated */
class UrlValidator implements UrlValidatorInterface, RequestMethodInterface
{
private const MAX_REDIRECTS = 15;
private const CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
. 'Chrome/112.0.0.0 Safari/537.36';
public function __construct(private ClientInterface $httpClient, private UrlShortenerOptions $options)
{
}
/**
* @deprecated
* @throws InvalidUrlException
*/
public function validateUrl(string $url, bool $doValidate): void
{
if (! $doValidate) {
return;
}
$this->validateUrlAndGetResponse($url);
}
/**
* @deprecated
* @throws InvalidUrlException
*/
public function validateUrlWithTitle(string $url, bool $doValidate): ?string
{
if (! $doValidate && ! $this->options->autoResolveTitles) {
return null;
}
if (! $this->options->autoResolveTitles) {
$this->validateUrlAndGetResponse($url, self::METHOD_HEAD);
return null;
}
$response = $doValidate ? $this->validateUrlAndGetResponse($url) : $this->getResponse($url);
if ($response === null) {
return null;
}
$contentType = strtolower($response->getHeaderLine('Content-Type'));
if (! str_starts_with($contentType, 'text/html')) {
return null;
}
$collectedBody = '';
$body = $response->getBody();
// With streaming enabled, we can walk the body until the </title> tag is found, and then stop
while (! str_contains($collectedBody, '</title>') && ! $body->eof()) {
$collectedBody .= $body->read(1024);
}
preg_match(TITLE_TAG_VALUE, $collectedBody, $matches);
return isset($matches[1]) ? $this->normalizeTitle($matches[1]) : null;
}
/**
* @param self::METHOD_GET|self::METHOD_HEAD $method
* @throws InvalidUrlException
*/
private function validateUrlAndGetResponse(string $url, string $method = self::METHOD_GET): ResponseInterface
{
try {
return $this->httpClient->request($method, $url, [
RequestOptions::ALLOW_REDIRECTS => ['max' => self::MAX_REDIRECTS],
RequestOptions::IDN_CONVERSION => true,
// Making the request with a browser's user agent makes the validation closer to a real user
RequestOptions::HEADERS => ['User-Agent' => self::CHROME_USER_AGENT],
RequestOptions::STREAM => true, // This ensures large files are not fully downloaded if not needed
]);
} catch (GuzzleException $e) {
throw InvalidUrlException::fromUrl($url, $e);
}
}
private function getResponse(string $url): ?ResponseInterface
{
try {
return $this->validateUrlAndGetResponse($url);
} catch (Throwable) {
return null;
}
}
private function normalizeTitle(string $title): string
{
return html_entity_decode(trim($title));
}
}

View File

@@ -1,23 +0,0 @@
<?php
declare(strict_types=1);
namespace Shlinkio\Shlink\Core\Util;
use Shlinkio\Shlink\Core\Exception\InvalidUrlException;
/** @deprecated */
interface UrlValidatorInterface
{
/**
* @deprecated
* @throws InvalidUrlException
*/
public function validateUrl(string $url, bool $doValidate): void;
/**
* @deprecated
* @throws InvalidUrlException
*/
public function validateUrlWithTitle(string $url, bool $doValidate): ?string;
}