diff --git a/module/Core/src/Repository/VisitRepository.php b/module/Core/src/Repository/VisitRepository.php index aa3aa57d..ed18df10 100644 --- a/module/Core/src/Repository/VisitRepository.php +++ b/module/Core/src/Repository/VisitRepository.php @@ -5,79 +5,61 @@ declare(strict_types=1); namespace Shlinkio\Shlink\Core\Repository; use Doctrine\ORM\EntityRepository; -use Doctrine\ORM\Query; use Doctrine\ORM\QueryBuilder; use Shlinkio\Shlink\Common\Util\DateRange; use Shlinkio\Shlink\Core\Entity\Visit; class VisitRepository extends EntityRepository implements VisitRepositoryInterface { - private const DEFAULT_BLOCK_SIZE = 10000; - /** - * This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in - * smaller blocks of a specific size. - * This will have side effects if you update those rows while you iterate them, in a way that they are no longer - * unlocated. - * If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the - * dataset. - * * @return iterable|Visit[] */ - public function findUnlocatedVisits(bool $applyOffset = true): iterable + public function findUnlocatedVisits(int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable { - $dql = <<getEntityManager()->createQuery($dql); - $remainingVisitsToProcess = $this->count(['visitLocation' => null]); + $qb = $this->getEntityManager()->createQueryBuilder(); + $qb->select('v') + ->from(Visit::class, 'v') + ->where($qb->expr()->isNull('v.visitLocation')); - return $this->findVisitsForQuery($query, $remainingVisitsToProcess, $applyOffset); + return $this->findVisitsForQuery($qb, $blockSize); } /** - * This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in - * smaller blocks of a specific size. - * This will have side effects if you update those rows while you iterate them, in a way that they are no longer - * unlocated. - * If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the - * dataset. - * * @return iterable|Visit[] */ - public function findVisitsWithEmptyLocation(bool $applyOffset = true): iterable + public function findVisitsWithEmptyLocation(int $blockSize = self::DEFAULT_BLOCK_SIZE): iterable { $qb = $this->getEntityManager()->createQueryBuilder(); - $qb->from(Visit::class, 'v') + $qb->select('v') + ->from(Visit::class, 'v') ->join('v.visitLocation', 'vl') ->where($qb->expr()->isNotNull('v.visitLocation')) ->andWhere($qb->expr()->eq('vl.isEmpty', ':isEmpty')) ->setParameter('isEmpty', true); - $countQb = clone $qb; - $query = $qb->select('v')->getQuery(); - $remainingVisitsToProcess = (int) $countQb->select('COUNT(DISTINCT v.id)')->getQuery()->getSingleScalarResult(); - - return $this->findVisitsForQuery($query, $remainingVisitsToProcess, $applyOffset); + return $this->findVisitsForQuery($qb, $blockSize); } - private function findVisitsForQuery(Query $query, int $remainingVisitsToProcess, bool $applyOffset = true): iterable + private function findVisitsForQuery(QueryBuilder $qb, int $blockSize): iterable { - $blockSize = self::DEFAULT_BLOCK_SIZE; - $query = $query->setMaxResults($blockSize); - $offset = 0; + $originalQueryBuilder = $qb->setMaxResults($blockSize) + ->orderBy('v.id', 'ASC'); + $lastId = '0'; - // FIXME Do not use the $applyOffset workaround. Instead, always start with first result, but skip already - // processed results. That should work both if any entry is edited or not - while ($remainingVisitsToProcess > 0) { - $iterator = $query->setFirstResult($applyOffset ? $offset : null)->iterate(); - foreach ($iterator as $key => [$value]) { - yield $key => $value; + do { + $qb = (clone $originalQueryBuilder)->andWhere($qb->expr()->gt('v.id', $lastId)); + $iterator = $qb->getQuery()->iterate(); + $resultsFound = false; + + /** @var Visit $visit */ + foreach ($iterator as $key => [$visit]) { + $resultsFound = true; + yield $key => $visit; } - $remainingVisitsToProcess -= $blockSize; - $offset += $blockSize; - } + // As the query is ordered by ID, we can take the last one every time in order to exclude the whole list + $lastId = isset($visit) ? $visit->getId() : $lastId; + } while ($resultsFound); } /** diff --git a/module/Core/src/Repository/VisitRepositoryInterface.php b/module/Core/src/Repository/VisitRepositoryInterface.php index ceb07564..b076cff3 100644 --- a/module/Core/src/Repository/VisitRepositoryInterface.php +++ b/module/Core/src/Repository/VisitRepositoryInterface.php @@ -10,29 +10,17 @@ use Shlinkio\Shlink\Core\Entity\Visit; interface VisitRepositoryInterface extends ObjectRepository { - /** - * This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in - * smaller blocks of a specific size. - * This will have side effects if you update those rows while you iterate them, in a way that they are no longer - * unlocated. - * If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the - * dataset. - * - * @return iterable|Visit[] - */ - public function findUnlocatedVisits(bool $applyOffset = true): iterable; + public const DEFAULT_BLOCK_SIZE = 10000; /** - * This method will allow you to iterate the whole list of unlocated visits, but loading them into memory in - * smaller blocks of a specific size. - * This will have side effects if you update those rows while you iterate them, in a way that they are no longer - * unlocated. - * If you plan to do so, pass the first argument as false in order to disable applying offsets while slicing the - * dataset. - * * @return iterable|Visit[] */ - public function findVisitsWithEmptyLocation(bool $applyOffset = true): iterable; + public function findUnlocatedVisits(int $defaultBlockSize = self::DEFAULT_BLOCK_SIZE): iterable; + + /** + * @return iterable|Visit[] + */ + public function findVisitsWithEmptyLocation(int $defaultBlockSize = self::DEFAULT_BLOCK_SIZE): iterable; /** * @return Visit[] diff --git a/module/Core/src/Visit/VisitLocator.php b/module/Core/src/Visit/VisitLocator.php index 9c6ed098..ee5acdf3 100644 --- a/module/Core/src/Visit/VisitLocator.php +++ b/module/Core/src/Visit/VisitLocator.php @@ -24,14 +24,14 @@ class VisitLocator implements VisitLocatorInterface { /** @var VisitRepository $repo */ $repo = $this->em->getRepository(Visit::class); - $this->locateVisits($repo->findUnlocatedVisits(false), $geolocateVisit, $notifyVisitWithLocation); + $this->locateVisits($repo->findUnlocatedVisits(), $geolocateVisit, $notifyVisitWithLocation); } public function locateVisitsWithEmptyLocation(callable $geolocateVisit, callable $notifyVisitWithLocation): void { /** @var VisitRepository $repo */ $repo = $this->em->getRepository(Visit::class); - $this->locateVisits($repo->findVisitsWithEmptyLocation(false), $geolocateVisit, $notifyVisitWithLocation); + $this->locateVisits($repo->findVisitsWithEmptyLocation(), $geolocateVisit, $notifyVisitWithLocation); } /** diff --git a/module/Core/test-db/Repository/VisitRepositoryTest.php b/module/Core/test-db/Repository/VisitRepositoryTest.php index 60420e70..b72453d6 100644 --- a/module/Core/test-db/Repository/VisitRepositoryTest.php +++ b/module/Core/test-db/Repository/VisitRepositoryTest.php @@ -36,8 +36,11 @@ class VisitRepositoryTest extends DatabaseTestCase $this->repo = $this->getEntityManager()->getRepository(Visit::class); } - /** @test */ - public function findVisitsReturnsProperVisits(): void + /** + * @test + * @dataProvider provideBlockSize + */ + public function findVisitsReturnsProperVisits(int $blockSize): void { $shortUrl = new ShortUrl(''); $this->getEntityManager()->persist($shortUrl); @@ -63,8 +66,8 @@ class VisitRepositoryTest extends DatabaseTestCase } $this->getEntityManager()->flush(); - $withEmptyLocation = $this->repo->findVisitsWithEmptyLocation(); - $unlocated = $this->repo->findUnlocatedVisits(); + $withEmptyLocation = $this->repo->findVisitsWithEmptyLocation($blockSize); + $unlocated = $this->repo->findUnlocatedVisits($blockSize); // Important! assertCount will not work here, as this iterable object loads data dynamically and counts to // 0 if not iterated @@ -72,6 +75,11 @@ class VisitRepositoryTest extends DatabaseTestCase $this->assertEquals(4, $countIterable($withEmptyLocation)); } + public function provideBlockSize(): iterable + { + return map(range(1, 10), fn (int $value) => [$value]); + } + /** @test */ public function findVisitsByShortCodeReturnsProperData(): void { diff --git a/module/Core/test/Visit/VisitLocatorTest.php b/module/Core/test/Visit/VisitLocatorTest.php index 400a27c7..9e223e15 100644 --- a/module/Core/test/Visit/VisitLocatorTest.php +++ b/module/Core/test/Visit/VisitLocatorTest.php @@ -46,7 +46,7 @@ class VisitLocatorTest extends TestCase ); $repo = $this->prophesize(VisitRepository::class); - $findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits); + $findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits); $getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal()); $persist = $this->em->persist(Argument::type(Visit::class))->will(function (): void { @@ -81,7 +81,7 @@ class VisitLocatorTest extends TestCase ]; $repo = $this->prophesize(VisitRepository::class); - $findUnlocatedVisits = $repo->findUnlocatedVisits(false)->willReturn($unlocatedVisits); + $findUnlocatedVisits = $repo->findUnlocatedVisits()->willReturn($unlocatedVisits); $getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal()); $persist = $this->em->persist(Argument::type(Visit::class))->will(function (): void {