From 8b5409829974bd5af02bea8aaaf34238297c5906 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Wed, 3 Feb 2021 11:07:47 +0100 Subject: [PATCH] Added option to automatically resolve url titles --- config/autoload/url-shortener.global.php | 1 + module/Core/src/Model/ShortUrlMeta.php | 13 ++++++++ .../Core/src/Options/UrlShortenerOptions.php | 12 ++++++++ module/Core/src/Service/UrlShortener.php | 10 +++++-- module/Core/src/Util/UrlValidator.php | 30 +++++++++++++++++-- .../Core/src/Util/UrlValidatorInterface.php | 5 ++++ module/Core/test/Service/UrlShortenerTest.php | 4 +-- 7 files changed, 68 insertions(+), 7 deletions(-) diff --git a/config/autoload/url-shortener.global.php b/config/autoload/url-shortener.global.php index f27210af..f4a7966a 100644 --- a/config/autoload/url-shortener.global.php +++ b/config/autoload/url-shortener.global.php @@ -19,6 +19,7 @@ return [ 'default_short_codes_length' => DEFAULT_SHORT_CODES_LENGTH, 'redirect_status_code' => DEFAULT_REDIRECT_STATUS_CODE, 'redirect_cache_lifetime' => DEFAULT_REDIRECT_CACHE_LIFETIME, + 'auto_resolve_titles' => false, // Deprecated value. Default to true with Shlink 3.0.0 ], ]; diff --git a/module/Core/src/Model/ShortUrlMeta.php b/module/Core/src/Model/ShortUrlMeta.php index 65ff5e1e..a069062c 100644 --- a/module/Core/src/Model/ShortUrlMeta.php +++ b/module/Core/src/Model/ShortUrlMeta.php @@ -167,4 +167,17 @@ final class ShortUrlMeta { return $this->title; } + + public function hasTitle(): bool + { + return $this->title !== null; + } + + public function withResolvedTitle(?string $title): self + { + $copy = clone $this; + $copy->title = $title; + + return $copy; + } } diff --git a/module/Core/src/Options/UrlShortenerOptions.php b/module/Core/src/Options/UrlShortenerOptions.php index 92bb7d07..553a160f 100644 --- a/module/Core/src/Options/UrlShortenerOptions.php +++ b/module/Core/src/Options/UrlShortenerOptions.php @@ -18,6 +18,7 @@ class UrlShortenerOptions extends AbstractOptions private bool $validateUrl = true; private int $redirectStatusCode = DEFAULT_REDIRECT_STATUS_CODE; private int $redirectCacheLifetime = DEFAULT_REDIRECT_CACHE_LIFETIME; + private bool $autoResolveTitles = false; // Deprecated value. Default to true with Shlink 3.0.0 public function isUrlValidationEnabled(): bool { @@ -55,4 +56,15 @@ class UrlShortenerOptions extends AbstractOptions ? $redirectCacheLifetime : DEFAULT_REDIRECT_CACHE_LIFETIME; } + + public function autoResolveTitles(): bool + { + return $this->autoResolveTitles; + } + + protected function setAutoResolveTitles(bool $autoResolveTitles): self + { + $this->autoResolveTitles = $autoResolveTitles; + return $this; + } } diff --git a/module/Core/src/Service/UrlShortener.php b/module/Core/src/Service/UrlShortener.php index f8125524..aa0908fe 100644 --- a/module/Core/src/Service/UrlShortener.php +++ b/module/Core/src/Service/UrlShortener.php @@ -13,7 +13,6 @@ use Shlinkio\Shlink\Core\Repository\ShortUrlRepositoryInterface; use Shlinkio\Shlink\Core\Service\ShortUrl\ShortCodeHelperInterface; use Shlinkio\Shlink\Core\ShortUrl\Resolver\ShortUrlRelationResolverInterface; use Shlinkio\Shlink\Core\Util\UrlValidatorInterface; -use Throwable; class UrlShortener implements UrlShortenerInterface { @@ -37,7 +36,6 @@ class UrlShortener implements UrlShortenerInterface /** * @throws NonUniqueSlugException * @throws InvalidUrlException - * @throws Throwable */ public function shorten(ShortUrlMeta $meta): ShortUrl { @@ -47,7 +45,13 @@ class UrlShortener implements UrlShortenerInterface return $existingShortUrl; } - $this->urlValidator->validateUrl($meta->getLongUrl(), $meta->doValidateUrl()); + if ($meta->hasTitle()) { + $this->urlValidator->validateUrl($meta->getLongUrl(), $meta->doValidateUrl()); + } else { + $meta = $meta->withResolvedTitle( + $this->urlValidator->validateUrlWithTitle($meta->getLongUrl(), $meta->doValidateUrl()), + ); + } return $this->em->transactional(function () use ($meta) { $shortUrl = ShortUrl::fromMeta($meta, $this->relationResolver); diff --git a/module/Core/src/Util/UrlValidator.php b/module/Core/src/Util/UrlValidator.php index ccf69dd1..1f590de5 100644 --- a/module/Core/src/Util/UrlValidator.php +++ b/module/Core/src/Util/UrlValidator.php @@ -8,9 +8,12 @@ use Fig\Http\Message\RequestMethodInterface; use GuzzleHttp\ClientInterface; use GuzzleHttp\Exception\GuzzleException; use GuzzleHttp\RequestOptions; +use Psr\Http\Message\ResponseInterface; use Shlinkio\Shlink\Core\Exception\InvalidUrlException; use Shlinkio\Shlink\Core\Options\UrlShortenerOptions; +use function preg_match; + class UrlValidator implements UrlValidatorInterface, RequestMethodInterface { private const MAX_REDIRECTS = 15; @@ -35,13 +38,36 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return; } + $this->validateUrlAndGetResponse($url, true); + } + + public function validateUrlWithTitle(string $url, ?bool $doValidate): ?string + { + $doValidate = $doValidate ?? $this->options->isUrlValidationEnabled(); + $response = $this->validateUrlAndGetResponse($url, $doValidate); + + if ($response === null || ! $this->options->autoResolveTitles()) { + return null; + } + + $body = $response->getBody()->__toString(); + preg_match('/(.+)<\/title>/i', $body, $matches); + return $matches[1] ?? null; + } + + private function validateUrlAndGetResponse(string $url, bool $throwOnError): ?ResponseInterface + { try { - $this->httpClient->request(self::METHOD_GET, $url, [ + return $this->httpClient->request(self::METHOD_GET, $url, [ RequestOptions::ALLOW_REDIRECTS => ['max' => self::MAX_REDIRECTS], RequestOptions::IDN_CONVERSION => true, ]); } catch (GuzzleException $e) { - throw InvalidUrlException::fromUrl($url, $e); + if ($throwOnError) { + throw InvalidUrlException::fromUrl($url, $e); + } + + return null; } } } diff --git a/module/Core/src/Util/UrlValidatorInterface.php b/module/Core/src/Util/UrlValidatorInterface.php index fdf1e781..f198d301 100644 --- a/module/Core/src/Util/UrlValidatorInterface.php +++ b/module/Core/src/Util/UrlValidatorInterface.php @@ -12,4 +12,9 @@ interface UrlValidatorInterface * @throws InvalidUrlException */ public function validateUrl(string $url, ?bool $doValidate): void; + + /** + * @throws InvalidUrlException + */ + public function validateUrlWithTitle(string $url, ?bool $doValidate): ?string; } diff --git a/module/Core/test/Service/UrlShortenerTest.php b/module/Core/test/Service/UrlShortenerTest.php index a9ba783f..24abf69f 100644 --- a/module/Core/test/Service/UrlShortenerTest.php +++ b/module/Core/test/Service/UrlShortenerTest.php @@ -31,7 +31,7 @@ class UrlShortenerTest extends TestCase public function setUp(): void { $this->urlValidator = $this->prophesize(UrlValidatorInterface::class); - $this->urlValidator->validateUrl('http://foobar.com/12345/hello?foo=bar', null)->will( + $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', null)->will( function (): void { }, ); @@ -101,7 +101,7 @@ class UrlShortenerTest extends TestCase $findExisting->shouldHaveBeenCalledOnce(); $getRepo->shouldHaveBeenCalledOnce(); $this->em->persist(Argument::cetera())->shouldNotHaveBeenCalled(); - $this->urlValidator->validateUrl(Argument::cetera())->shouldNotHaveBeenCalled(); + $this->urlValidator->validateUrlWithTitle(Argument::cetera())->shouldNotHaveBeenCalled(); self::assertSame($expected, $result); }