From ec17eb3fbc542ae6945491c445649daf72098387 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 22 May 2022 08:29:26 +0200 Subject: [PATCH 1/2] Ensured html entities are parsed when auto-resolving titles --- module/Core/src/Util/UrlValidator.php | 8 +++++++- module/Core/test/Util/UrlValidatorTest.php | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/module/Core/src/Util/UrlValidator.php b/module/Core/src/Util/UrlValidator.php index 6bd8d76f..2e2965b1 100644 --- a/module/Core/src/Util/UrlValidator.php +++ b/module/Core/src/Util/UrlValidator.php @@ -13,6 +13,7 @@ use Shlinkio\Shlink\Core\Exception\InvalidUrlException; use Shlinkio\Shlink\Core\Options\UrlShortenerOptions; use Throwable; +use function html_entity_decode; use function preg_match; use function str_contains; use function str_starts_with; @@ -71,7 +72,7 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface $collectedBody .= $body->read(1024); } preg_match(TITLE_TAG_VALUE, $collectedBody, $matches); - return isset($matches[1]) ? trim($matches[1]) : null; + return isset($matches[1]) ? $this->normalizeTitle($matches[1]) : null; } /** @@ -101,4 +102,9 @@ class UrlValidator implements UrlValidatorInterface, RequestMethodInterface return null; } } + + private function normalizeTitle(string $title): string + { + return html_entity_decode(trim($title)); + } } diff --git a/module/Core/test/Util/UrlValidatorTest.php b/module/Core/test/Util/UrlValidatorTest.php index 7e1f9a1b..8aba6598 100644 --- a/module/Core/test/Util/UrlValidatorTest.php +++ b/module/Core/test/Util/UrlValidatorTest.php @@ -128,7 +128,7 @@ class UrlValidatorTest extends TestCase $result = $this->urlValidator->validateUrlWithTitle('http://foobar.com/12345/hello?foo=bar', true); - self::assertEquals('Resolved title', $result); + self::assertEquals('Resolved "title"', $result); $request->shouldHaveBeenCalledOnce(); } @@ -162,7 +162,7 @@ class UrlValidatorTest extends TestCase private function respWithTitle(): Response { - $body = $this->createStreamWithContent(' Resolved title'); + $body = $this->createStreamWithContent(' Resolved "title" '); return new Response($body, 200, ['Content-Type' => 'TEXT/html; charset=utf-8']); } From f224bb98c4ef83c901f4345657c5af9c9072206f Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sun, 22 May 2022 08:30:46 +0200 Subject: [PATCH 2/2] Updated changelog --- CHANGELOG.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 97153daf..56c66d15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com), and this project adheres to [Semantic Versioning](https://semver.org). +## [Unreleased] +### Added +* *Nothing* + +### Changed +* *Nothing* + +### Deprecated +* *Nothing* + +### Removed +* *Nothing* + +### Fixed +* [#1448](https://github.com/shlinkio/shlink/issues/1448) Fixed HTML entities not being properly parsed when auto-resolving page titles. + + ## [3.1.1] - 2022-05-09 ### Added * *Nothing*