diff --git a/module/Core/src/Repository/VisitRepository.php b/module/Core/src/Repository/VisitRepository.php index 28eb2466..aa3aa57d 100644 --- a/module/Core/src/Repository/VisitRepository.php +++ b/module/Core/src/Repository/VisitRepository.php @@ -5,32 +5,70 @@ declare(strict_types=1); namespace Shlinkio\Shlink\Core\Repository; use Doctrine\ORM\EntityRepository; +use Doctrine\ORM\Query; use Doctrine\ORM\QueryBuilder; use Shlinkio\Shlink\Common\Util\DateRange; use Shlinkio\Shlink\Core\Entity\Visit; class VisitRepository extends EntityRepository implements VisitRepositoryInterface { + private const DEFAULT_BLOCK_SIZE = 10000; + /** * This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in * smaller blocks of a specific size. * This will have side effects if you update those rows while you iterate them, in a way that they are no longer * unlocated. * If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the - * dataset + * dataset. * * @return iterable|Visit[] */ - public function findUnlocatedVisits(bool $applyOffset = true, int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable + public function findUnlocatedVisits(bool $applyOffset = true): iterable { $dql = <<getEntityManager()->createQuery($dql) - ->setMaxResults($blockSize); + $query = $this->getEntityManager()->createQuery($dql); $remainingVisitsToProcess = $this->count(['visitLocation' => null]); + + return $this->findVisitsForQuery($query, $remainingVisitsToProcess, $applyOffset); + } + + /** + * This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in + * smaller blocks of a specific size. + * This will have side effects if you update those rows while you iterate them, in a way that they are no longer + * unlocated. + * If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the + * dataset. + * + * @return iterable|Visit[] + */ + public function findVisitsWithEmptyLocation(bool $applyOffset = true): iterable + { + $qb = $this->getEntityManager()->createQueryBuilder(); + $qb->from(Visit::class, 'v') + ->join('v.visitLocation', 'vl') + ->where($qb->expr()->isNotNull('v.visitLocation')) + ->andWhere($qb->expr()->eq('vl.isEmpty', ':isEmpty')) + ->setParameter('isEmpty', true); + $countQb = clone $qb; + + $query = $qb->select('v')->getQuery(); + $remainingVisitsToProcess = (int) $countQb->select('COUNT(DISTINCT v.id)')->getQuery()->getSingleScalarResult(); + + return $this->findVisitsForQuery($query, $remainingVisitsToProcess, $applyOffset); + } + + private function findVisitsForQuery(Query $query, int $remainingVisitsToProcess, bool $applyOffset = true): iterable + { + $blockSize = self::DEFAULT_BLOCK_SIZE; + $query = $query->setMaxResults($blockSize); $offset = 0; + // FIXME Do not use the $applyOffset workaround. Instead, always start with first result, but skip already + // processed results. That should work both if any entry is edited or not while ($remainingVisitsToProcess > 0) { $iterator = $query->setFirstResult($applyOffset ? $offset : null)->iterate(); foreach ($iterator as $key => [$value]) { diff --git a/module/Core/src/Repository/VisitRepositoryInterface.php b/module/Core/src/Repository/VisitRepositoryInterface.php index 0d0b66d0..ceb07564 100644 --- a/module/Core/src/Repository/VisitRepositoryInterface.php +++ b/module/Core/src/Repository/VisitRepositoryInterface.php @@ -10,18 +10,29 @@ use Shlinkio\Shlink\Core\Entity\Visit; interface VisitRepositoryInterface extends ObjectRepository { - public const DEFAULT_BLOCK_SIZE = 10000; + /** + * This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in + * smaller blocks of a specific size. + * This will have side effects if you update those rows while you iterate them, in a way that they are no longer + * unlocated. + * If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the + * dataset. + * + * @return iterable|Visit[] + */ + public function findUnlocatedVisits(bool $applyOffset = true): iterable; /** * This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in * smaller blocks of a specific size. - * This will have side effects if you update those rows while you iterate them. + * This will have side effects if you update those rows while you iterate them, in a way that they are no longer + * unlocated. * If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the - * dataset + * dataset. * * @return iterable|Visit[] */ - public function findUnlocatedVisits(bool $applyOffset = true, int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable; + public function findVisitsWithEmptyLocation(bool $applyOffset = true): iterable; /** * @return Visit[] diff --git a/module/Core/src/Visit/VisitLocator.php b/module/Core/src/Visit/VisitLocator.php index bcfc9e33..9c6ed098 100644 --- a/module/Core/src/Visit/VisitLocator.php +++ b/module/Core/src/Visit/VisitLocator.php @@ -29,7 +29,9 @@ class VisitLocator implements VisitLocatorInterface public function locateVisitsWithEmptyLocation(callable $geolocateVisit, callable $notifyVisitWithLocation): void { - $this->locateVisits([], $geolocateVisit, $notifyVisitWithLocation); + /** @var VisitRepository $repo */ + $repo = $this->em->getRepository(Visit::class); + $this->locateVisits($repo->findVisitsWithEmptyLocation(false), $geolocateVisit, $notifyVisitWithLocation); } /** diff --git a/module/Core/test-db/Repository/VisitRepositoryTest.php b/module/Core/test-db/Repository/VisitRepositoryTest.php index bd1a0f2d..60420e70 100644 --- a/module/Core/test-db/Repository/VisitRepositoryTest.php +++ b/module/Core/test-db/Repository/VisitRepositoryTest.php @@ -36,19 +36,24 @@ class VisitRepositoryTest extends DatabaseTestCase $this->repo = $this->getEntityManager()->getRepository(Visit::class); } - /** - * @test - * @dataProvider provideBlockSize - */ - public function findUnlocatedVisitsReturnsProperVisits(int $blockSize): void + /** @test */ + public function findVisitsReturnsProperVisits(): void { $shortUrl = new ShortUrl(''); $this->getEntityManager()->persist($shortUrl); + $countIterable = function (iterable $results): int { + $resultsCount = 0; + foreach ($results as $value) { + $resultsCount++; + } + + return $resultsCount; + }; for ($i = 0; $i < 6; $i++) { $visit = new Visit($shortUrl, Visitor::emptyInstance()); - if ($i % 2 === 0) { + if ($i >= 2) { $location = new VisitLocation(Location::emptyInstance()); $this->getEntityManager()->persist($location); $visit->locate($location); @@ -58,18 +63,13 @@ class VisitRepositoryTest extends DatabaseTestCase } $this->getEntityManager()->flush(); - $resultsCount = 0; - $results = $this->repo->findUnlocatedVisits(true, $blockSize); - foreach ($results as $value) { - $resultsCount++; - } + $withEmptyLocation = $this->repo->findVisitsWithEmptyLocation(); + $unlocated = $this->repo->findUnlocatedVisits(); - $this->assertEquals(3, $resultsCount); - } - - public function provideBlockSize(): iterable - { - return map(range(1, 5), fn (int $value) => [$value]); + // Important! assertCount will not work here, as this iterable object loads data dynamically and counts to + // 0 if not iterated + $this->assertEquals(2, $countIterable($unlocated)); + $this->assertEquals(4, $countIterable($withEmptyLocation)); } /** @test */