From 3ef02d46c08b846516c3d4385c33180d63ddb9e9 Mon Sep 17 00:00:00 2001 From: Alejandro Celaya Date: Sat, 22 May 2021 09:34:42 +0200 Subject: [PATCH] Added logic to resolve crawlable short codes --- module/Core/config/dependencies.config.php | 4 +- module/Core/src/Crawling/CrawlingHelper.php | 15 ++++++- .../src/Repository/ShortUrlRepository.php | 24 +++++++++++ .../ShortUrlRepositoryInterface.php | 2 + .../Core/src/Repository/VisitRepository.php | 4 +- .../Core/test/Crawling/CrawlingHelperTest.php | 43 +++++++++++++++++++ 6 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 module/Core/test/Crawling/CrawlingHelperTest.php diff --git a/module/Core/config/dependencies.config.php b/module/Core/config/dependencies.config.php index 4eb3d60d..7dfd5df2 100644 --- a/module/Core/config/dependencies.config.php +++ b/module/Core/config/dependencies.config.php @@ -59,7 +59,7 @@ return [ Importer\ImportedLinksProcessor::class => ConfigAbstractFactory::class, - Crawling\CrawlingHelper::class => InvokableFactory::class, + Crawling\CrawlingHelper::class => ConfigAbstractFactory::class, ], 'aliases' => [ @@ -150,6 +150,8 @@ return [ Service\ShortUrl\ShortCodeHelper::class, Util\DoctrineBatchHelper::class, ], + + Crawling\CrawlingHelper::class => ['em'], ], ]; diff --git a/module/Core/src/Crawling/CrawlingHelper.php b/module/Core/src/Crawling/CrawlingHelper.php index ef54761b..5f688645 100644 --- a/module/Core/src/Crawling/CrawlingHelper.php +++ b/module/Core/src/Crawling/CrawlingHelper.php @@ -4,10 +4,23 @@ declare(strict_types=1); namespace Shlinkio\Shlink\Core\Crawling; +use Doctrine\ORM\EntityManagerInterface; +use Shlinkio\Shlink\Core\Entity\ShortUrl; +use Shlinkio\Shlink\Core\Repository\ShortUrlRepositoryInterface; + class CrawlingHelper implements CrawlingHelperInterface { + private EntityManagerInterface $em; + + public function __construct(EntityManagerInterface $em) + { + $this->em = $em; + } + public function listCrawlableShortCodes(): iterable { - return []; + /** @var ShortUrlRepositoryInterface $repo */ + $repo = $this->em->getRepository(ShortUrl::class); + yield from $repo->findCrawlableShortCodes(); } } diff --git a/module/Core/src/Repository/ShortUrlRepository.php b/module/Core/src/Repository/ShortUrlRepository.php index fe3b170c..eacf293b 100644 --- a/module/Core/src/Repository/ShortUrlRepository.php +++ b/module/Core/src/Repository/ShortUrlRepository.php @@ -288,4 +288,28 @@ class ShortUrlRepository extends EntitySpecificationRepository implements ShortU $qb->andWhere($qb->expr()->isNull('s.domain')); } } + + public function findCrawlableShortCodes(): iterable + { + $blockSize = 1000; + $qb = $this->getEntityManager()->createQueryBuilder(); + $qb->select('DISTINCT s.shortCode') + ->from(ShortUrl::class, 's') + ->where($qb->expr()->eq('s.crawlable', ':crawlable')) + ->setParameter('crawlable', true) + ->setMaxResults($blockSize); + + $page = 0; + do { + $qbClone = (clone $qb)->setFirstResult($blockSize * $page); + $iterator = $qbClone->getQuery()->toIterable(); + $resultsFound = false; + $page++; + + foreach ($iterator as ['shortCode' => $shortCode]) { + $resultsFound = true; + yield $shortCode; + } + } while ($resultsFound); + } } diff --git a/module/Core/src/Repository/ShortUrlRepositoryInterface.php b/module/Core/src/Repository/ShortUrlRepositoryInterface.php index 29485eeb..5d8fa924 100644 --- a/module/Core/src/Repository/ShortUrlRepositoryInterface.php +++ b/module/Core/src/Repository/ShortUrlRepositoryInterface.php @@ -41,4 +41,6 @@ interface ShortUrlRepositoryInterface extends ObjectRepository, EntitySpecificat public function findOneMatching(ShortUrlMeta $meta): ?ShortUrl; public function findOneByImportedUrl(ImportedShlinkUrl $url): ?ShortUrl; + + public function findCrawlableShortCodes(): iterable; } diff --git a/module/Core/src/Repository/VisitRepository.php b/module/Core/src/Repository/VisitRepository.php index d9c18977..35d6a535 100644 --- a/module/Core/src/Repository/VisitRepository.php +++ b/module/Core/src/Repository/VisitRepository.php @@ -66,11 +66,11 @@ class VisitRepository extends EntitySpecificationRepository implements VisitRepo do { $qb = (clone $originalQueryBuilder)->andWhere($qb->expr()->gt('v.id', $lastId)); - $iterator = $qb->getQuery()->iterate(); + $iterator = $qb->getQuery()->toIterable(); $resultsFound = false; /** @var Visit $visit */ - foreach ($iterator as $key => [$visit]) { + foreach ($iterator as $key => $visit) { $resultsFound = true; yield $key => $visit; } diff --git a/module/Core/test/Crawling/CrawlingHelperTest.php b/module/Core/test/Crawling/CrawlingHelperTest.php new file mode 100644 index 00000000..2c65ebac --- /dev/null +++ b/module/Core/test/Crawling/CrawlingHelperTest.php @@ -0,0 +1,43 @@ +em = $this->prophesize(EntityManagerInterface::class); + $this->helper = new CrawlingHelper($this->em->reveal()); + } + + /** @test */ + public function listCrawlableShortCodesDelegatesIntoRepository(): void + { + $repo = $this->prophesize(ShortUrlRepositoryInterface::class); + $findCrawlableShortCodes = $repo->findCrawlableShortCodes()->willReturn([]); + $getRepo = $this->em->getRepository(ShortUrl::class)->willReturn($repo->reveal()); + + $result = $this->helper->listCrawlableShortCodes(); + foreach ($result as $shortCode) { + // Result is a generator and therefore, it needs to be iterated + } + + $findCrawlableShortCodes->shouldHaveBeenCalledOnce(); + $getRepo->shouldHaveBeenCalledOnce(); + } +}