diff --git a/CHANGELOG.md b/CHANGELOG.md index c5d6f749..22176f7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com), and this * `disable_ua_tracking`: If true, the user agent will not be tracked. * [#955](https://github.com/shlinkio/shlink/issues/955) Added new option to set short URLs as crawlable, making them be listed in the robots.txt as Allowed. +* [#900](https://github.com/shlinkio/shlink/issues/900) Shlink now tries to detect if the visit is coming from a potential bot or crawler, and allows to exclude those visits from visits lists if desired. ### Changed * [#1036](https://github.com/shlinkio/shlink/issues/1036) Updated to `happyr/doctrine-specification` 2.0. diff --git a/composer.json b/composer.json index 2474cebf..0bb1186a 100644 --- a/composer.json +++ b/composer.json @@ -25,6 +25,7 @@ "geoip2/geoip2": "^2.9", "guzzlehttp/guzzle": "^7.0", "happyr/doctrine-specification": "^2.0", + "jaybizzle/crawler-detect": "^1.2", "laminas/laminas-config": "^3.3", "laminas/laminas-config-aggregator": "^1.1", "laminas/laminas-diactoros": "^2.1.3", diff --git a/data/migrations/Version20210522124633.php b/data/migrations/Version20210522124633.php new file mode 100644 index 00000000..ea486e93 --- /dev/null +++ b/data/migrations/Version20210522124633.php @@ -0,0 +1,28 @@ +getTable('visits'); + $this->skipIf($visits->hasColumn(self::POTENTIAL_BOT_COLUMN)); + $visits->addColumn(self::POTENTIAL_BOT_COLUMN, Types::BOOLEAN, ['default' => false]); + } + + public function down(Schema $schema): void + { + $visits = $schema->getTable('visits'); + $this->skipIf(! $visits->hasColumn(self::POTENTIAL_BOT_COLUMN)); + $visits->dropColumn(self::POTENTIAL_BOT_COLUMN); + } +} diff --git a/docs/async-api/async-api.json b/docs/async-api/async-api.json index 3360d897..0b546377 100644 --- a/docs/async-api/async-api.json +++ b/docs/async-api/async-api.json @@ -190,6 +190,10 @@ }, "visitLocation": { "$ref": "#/components/schemas/VisitLocation" + }, + "potentialBot": { + "type": "boolean", + "description": "Tells if Shlink thinks this visit comes potentially from a bot or crawler" } }, "example": { @@ -204,7 +208,8 @@ "longitude": -122.0946, "regionName": "California", "timezone": "America/Los_Angeles" - } + }, + "potentialBot": false } }, "OrphanVisit": { @@ -243,6 +248,7 @@ "regionName": "California", "timezone": "America/Los_Angeles" }, + "potentialBot": false, "visitedUrl": "https://doma.in", "type": "base_url" } diff --git a/docs/swagger/definitions/Visit.json b/docs/swagger/definitions/Visit.json index e004e4fe..ecb6b9f9 100644 --- a/docs/swagger/definitions/Visit.json +++ b/docs/swagger/definitions/Visit.json @@ -17,6 +17,10 @@ }, "visitLocation": { "$ref": "./VisitLocation.json" + }, + "potentialBot": { + "type": "boolean", + "description": "Tells if Shlink thinks this visit comes potentially from a bot or crawler" } } } diff --git a/docs/swagger/paths/v1_short-urls_{shortCode}_visits.json b/docs/swagger/paths/v1_short-urls_{shortCode}_visits.json index 03d66a99..e5bbbe86 100644 --- a/docs/swagger/paths/v1_short-urls_{shortCode}_visits.json +++ b/docs/swagger/paths/v1_short-urls_{shortCode}_visits.json @@ -57,6 +57,16 @@ "schema": { "type": "number" } + }, + { + "name": "excludeBots", + "in": "query", + "description": "Tells if visits from potential bots should be excluded from the result set", + "required": false, + "schema": { + "type": "string", + "enum": ["true"] + } } ], "security": [ @@ -98,7 +108,8 @@ "referer": "https://twitter.com", "date": "2015-08-20T05:05:03+04:00", "userAgent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0 Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0", - "visitLocation": null + "visitLocation": null, + "potentialBot": false }, { "referer": "https://t.co", @@ -112,13 +123,15 @@ "longitude": -122.0946, "regionName": "California", "timezone": "America/Los_Angeles" - } + }, + "potentialBot": false }, { "referer": null, "date": "2015-08-20T05:05:03+04:00", "userAgent": "some_web_crawler/1.4", - "visitLocation": null + "visitLocation": null, + "potentialBot": true } ], "pagination": { diff --git a/docs/swagger/paths/v2_tags_{tag}_visits.json b/docs/swagger/paths/v2_tags_{tag}_visits.json index d9d9dda7..df1242f6 100644 --- a/docs/swagger/paths/v2_tags_{tag}_visits.json +++ b/docs/swagger/paths/v2_tags_{tag}_visits.json @@ -54,6 +54,16 @@ "schema": { "type": "number" } + }, + { + "name": "excludeBots", + "in": "query", + "description": "Tells if visits from potential bots should be excluded from the result set", + "required": false, + "schema": { + "type": "string", + "enum": ["true"] + } } ], "security": [ @@ -95,7 +105,8 @@ "referer": "https://twitter.com", "date": "2015-08-20T05:05:03+04:00", "userAgent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0 Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0", - "visitLocation": null + "visitLocation": null, + "potentialBot": false }, { "referer": "https://t.co", @@ -109,13 +120,15 @@ "longitude": -122.0946, "regionName": "California", "timezone": "America/Los_Angeles" - } + }, + "potentialBot": false }, { "referer": null, "date": "2015-08-20T05:05:03+04:00", "userAgent": "some_web_crawler/1.4", - "visitLocation": null + "visitLocation": null, + "potentialBot": true } ], "pagination": { diff --git a/docs/swagger/paths/v2_visits_orphan.json b/docs/swagger/paths/v2_visits_orphan.json index 683f40ec..ce52b197 100644 --- a/docs/swagger/paths/v2_visits_orphan.json +++ b/docs/swagger/paths/v2_visits_orphan.json @@ -45,6 +45,16 @@ "schema": { "type": "number" } + }, + { + "name": "excludeBots", + "in": "query", + "description": "Tells if visits from potential bots should be excluded from the result set", + "required": false, + "schema": { + "type": "string", + "enum": ["true"] + } } ], "security": [ @@ -87,6 +97,7 @@ "date": "2015-08-20T05:05:03+04:00", "userAgent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0 Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0", "visitLocation": null, + "potentialBot": false, "visitedUrl": "https://doma.in", "type": "base_url" }, @@ -103,6 +114,7 @@ "regionName": "California", "timezone": "America/Los_Angeles" }, + "potentialBot": false, "visitedUrl": "https://doma.in/foo", "type": "invalid_short_url" }, @@ -111,6 +123,7 @@ "date": "2015-08-20T05:05:03+04:00", "userAgent": "some_web_crawler/1.4", "visitLocation": null, + "potentialBot": true, "visitedUrl": "https://doma.in/foo/bar/baz", "type": "regular_404" } diff --git a/module/Core/config/entities-mappings/Shlinkio.Shlink.Core.Entity.Visit.php b/module/Core/config/entities-mappings/Shlinkio.Shlink.Core.Entity.Visit.php index efcccb65..8886e141 100644 --- a/module/Core/config/entities-mappings/Shlinkio.Shlink.Core.Entity.Visit.php +++ b/module/Core/config/entities-mappings/Shlinkio.Shlink.Core.Entity.Visit.php @@ -65,4 +65,9 @@ return static function (ClassMetadata $metadata, array $emConfig): void { ->columnName('type') ->length(255) ->build(); + + $builder->createField('potentialBot', Types::BOOLEAN) + ->columnName('potential_bot') + ->option('default', false) + ->build(); }; diff --git a/module/Core/functions/functions.php b/module/Core/functions/functions.php index 62df2070..867f7c7d 100644 --- a/module/Core/functions/functions.php +++ b/module/Core/functions/functions.php @@ -7,6 +7,7 @@ namespace Shlinkio\Shlink\Core; use Cake\Chronos\Chronos; use DateTimeInterface; use Fig\Http\Message\StatusCodeInterface; +use Jaybizzle\CrawlerDetect\CrawlerDetect; use Laminas\InputFilter\InputFilter; use PUGX\Shortid\Factory as ShortIdFactory; use Shlinkio\Shlink\Common\Util\DateRange; @@ -128,3 +129,13 @@ function kebabCaseToCamelCase(string $name): string { return lcfirst(str_replace(' ', '', ucwords(str_replace('-', ' ', $name)))); } + +function isCrawler(string $userAgent): bool +{ + static $detector; + if ($detector === null) { + $detector = new CrawlerDetect(); + } + + return $detector->isCrawler($userAgent); +} diff --git a/module/Core/src/Entity/Visit.php b/module/Core/src/Entity/Visit.php index 98d1a4c5..358bedde 100644 --- a/module/Core/src/Entity/Visit.php +++ b/module/Core/src/Entity/Visit.php @@ -13,6 +13,8 @@ use Shlinkio\Shlink\Core\Model\Visitor; use Shlinkio\Shlink\Core\Visit\Model\VisitLocationInterface; use Shlinkio\Shlink\Importer\Model\ImportedShlinkVisit; +use function Shlinkio\Shlink\Core\isCrawler; + class Visit extends AbstractEntity implements JsonSerializable { public const TYPE_VALID_SHORT_URL = 'valid_short_url'; @@ -29,6 +31,7 @@ class Visit extends AbstractEntity implements JsonSerializable private string $type; private ?ShortUrl $shortUrl; private ?VisitLocation $visitLocation = null; + private bool $potentialBot; private function __construct(?ShortUrl $shortUrl, string $type) { @@ -49,6 +52,7 @@ class Visit extends AbstractEntity implements JsonSerializable { $instance = new self($shortUrl, self::TYPE_IMPORTED); $instance->userAgent = $importedVisit->userAgent(); + $instance->potentialBot = isCrawler($instance->userAgent); $instance->referer = $importedVisit->referer(); $instance->date = Chronos::instance($importedVisit->date()); @@ -88,6 +92,7 @@ class Visit extends AbstractEntity implements JsonSerializable $this->referer = $visitor->getReferer(); $this->remoteAddr = $this->processAddress($anonymize, $visitor->getRemoteAddress()); $this->visitedUrl = $visitor->getVisitedUrl(); + $this->potentialBot = $visitor->isPotentialBot(); } private function processAddress(bool $anonymize, ?string $address): ?string @@ -166,6 +171,7 @@ class Visit extends AbstractEntity implements JsonSerializable 'date' => $this->date->toAtomString(), 'userAgent' => $this->userAgent, 'visitLocation' => $this->visitLocation, + 'potentialBot' => $this->potentialBot, ]; } diff --git a/module/Core/src/Model/Visitor.php b/module/Core/src/Model/Visitor.php index 9564a41c..b73ed68a 100644 --- a/module/Core/src/Model/Visitor.php +++ b/module/Core/src/Model/Visitor.php @@ -8,6 +8,7 @@ use Psr\Http\Message\ServerRequestInterface; use Shlinkio\Shlink\Common\Middleware\IpAddressMiddlewareFactory; use Shlinkio\Shlink\Core\Options\TrackingOptions; +use function Shlinkio\Shlink\Core\isCrawler; use function substr; final class Visitor @@ -21,6 +22,7 @@ final class Visitor private string $referer; private string $visitedUrl; private ?string $remoteAddress; + private bool $potentialBot; public function __construct(string $userAgent, string $referer, ?string $remoteAddress, string $visitedUrl) { @@ -28,6 +30,7 @@ final class Visitor $this->referer = $this->cropToLength($referer, self::REFERER_MAX_LENGTH); $this->visitedUrl = $this->cropToLength($visitedUrl, self::VISITED_URL_MAX_LENGTH); $this->remoteAddress = $this->cropToLength($remoteAddress, self::REMOTE_ADDRESS_MAX_LENGTH); + $this->potentialBot = isCrawler($userAgent); } private function cropToLength(?string $value, int $length): ?string @@ -50,6 +53,11 @@ final class Visitor return new self('', '', null, ''); } + public static function botInstance(): self + { + return new self('cf-facebook', '', null, ''); + } + public function getUserAgent(): string { return $this->userAgent; @@ -70,14 +78,22 @@ final class Visitor return $this->visitedUrl; } + public function isPotentialBot(): bool + { + return $this->potentialBot; + } + public function normalizeForTrackingOptions(TrackingOptions $options): self { - $instance = self::emptyInstance(); + $instance = new self( + $options->disableUaTracking() ? '' : $this->userAgent, + $options->disableReferrerTracking() ? '' : $this->referer, + $options->disableIpTracking() ? null : $this->remoteAddress, + $this->visitedUrl, + ); - $instance->userAgent = $options->disableUaTracking() ? '' : $this->userAgent; - $instance->referer = $options->disableReferrerTracking() ? '' : $this->referer; - $instance->remoteAddress = $options->disableIpTracking() ? null : $this->remoteAddress; - $instance->visitedUrl = $this->visitedUrl; + // Keep the fact that the visit was a potential bot, even if we no longer save the user agent + $instance->potentialBot = $this->potentialBot; return $instance; } diff --git a/module/Core/src/Model/VisitsParams.php b/module/Core/src/Model/VisitsParams.php index b579239b..f8498c7a 100644 --- a/module/Core/src/Model/VisitsParams.php +++ b/module/Core/src/Model/VisitsParams.php @@ -16,12 +16,18 @@ final class VisitsParams private ?DateRange $dateRange; private int $page; private int $itemsPerPage; + private bool $excludeBots; - public function __construct(?DateRange $dateRange = null, int $page = self::FIRST_PAGE, ?int $itemsPerPage = null) - { + public function __construct( + ?DateRange $dateRange = null, + int $page = self::FIRST_PAGE, + ?int $itemsPerPage = null, + bool $excludeBots = false + ) { $this->dateRange = $dateRange ?? new DateRange(); $this->page = $page; $this->itemsPerPage = $this->determineItemsPerPage($itemsPerPage); + $this->excludeBots = $excludeBots; } private function determineItemsPerPage(?int $itemsPerPage): int @@ -39,6 +45,7 @@ final class VisitsParams parseDateRangeFromQuery($query, 'startDate', 'endDate'), (int) ($query['page'] ?? 1), isset($query['itemsPerPage']) ? (int) $query['itemsPerPage'] : null, + isset($query['excludeBots']), ); } @@ -56,4 +63,9 @@ final class VisitsParams { return $this->itemsPerPage; } + + public function excludeBots(): bool + { + return $this->excludeBots; + } } diff --git a/module/Core/src/Paginator/Adapter/OrphanVisitsPaginatorAdapter.php b/module/Core/src/Paginator/Adapter/OrphanVisitsPaginatorAdapter.php index 7167b9e7..d7361fb3 100644 --- a/module/Core/src/Paginator/Adapter/OrphanVisitsPaginatorAdapter.php +++ b/module/Core/src/Paginator/Adapter/OrphanVisitsPaginatorAdapter.php @@ -6,6 +6,8 @@ namespace Shlinkio\Shlink\Core\Paginator\Adapter; use Shlinkio\Shlink\Core\Model\VisitsParams; use Shlinkio\Shlink\Core\Repository\VisitRepositoryInterface; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsListFiltering; class OrphanVisitsPaginatorAdapter extends AbstractCacheableCountPaginatorAdapter { @@ -20,11 +22,20 @@ class OrphanVisitsPaginatorAdapter extends AbstractCacheableCountPaginatorAdapte protected function doCount(): int { - return $this->repo->countOrphanVisits($this->params->getDateRange()); + return $this->repo->countOrphanVisits(new VisitsCountFiltering( + $this->params->getDateRange(), + $this->params->excludeBots(), + )); } public function getSlice($offset, $length): iterable // phpcs:ignore { - return $this->repo->findOrphanVisits($this->params->getDateRange(), $length, $offset); + return $this->repo->findOrphanVisits(new VisitsListFiltering( + $this->params->getDateRange(), + $this->params->excludeBots(), + null, + $length, + $offset, + )); } } diff --git a/module/Core/src/Paginator/Adapter/VisitsForTagPaginatorAdapter.php b/module/Core/src/Paginator/Adapter/VisitsForTagPaginatorAdapter.php index 4c4e718b..d7c0580f 100644 --- a/module/Core/src/Paginator/Adapter/VisitsForTagPaginatorAdapter.php +++ b/module/Core/src/Paginator/Adapter/VisitsForTagPaginatorAdapter.php @@ -7,6 +7,8 @@ namespace Shlinkio\Shlink\Core\Paginator\Adapter; use Happyr\DoctrineSpecification\Specification\Specification; use Shlinkio\Shlink\Core\Model\VisitsParams; use Shlinkio\Shlink\Core\Repository\VisitRepositoryInterface; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsListFiltering; use Shlinkio\Shlink\Rest\Entity\ApiKey; class VisitsForTagPaginatorAdapter extends AbstractCacheableCountPaginatorAdapter @@ -32,10 +34,13 @@ class VisitsForTagPaginatorAdapter extends AbstractCacheableCountPaginatorAdapte { return $this->visitRepository->findVisitsByTag( $this->tag, - $this->params->getDateRange(), - $length, - $offset, - $this->resolveSpec(), + new VisitsListFiltering( + $this->params->getDateRange(), + $this->params->excludeBots(), + $this->resolveSpec(), + $length, + $offset, + ), ); } @@ -43,8 +48,11 @@ class VisitsForTagPaginatorAdapter extends AbstractCacheableCountPaginatorAdapte { return $this->visitRepository->countVisitsByTag( $this->tag, - $this->params->getDateRange(), - $this->resolveSpec(), + new VisitsCountFiltering( + $this->params->getDateRange(), + $this->params->excludeBots(), + $this->resolveSpec(), + ), ); } diff --git a/module/Core/src/Paginator/Adapter/VisitsPaginatorAdapter.php b/module/Core/src/Paginator/Adapter/VisitsPaginatorAdapter.php index 02ba37b3..5369bd05 100644 --- a/module/Core/src/Paginator/Adapter/VisitsPaginatorAdapter.php +++ b/module/Core/src/Paginator/Adapter/VisitsPaginatorAdapter.php @@ -8,6 +8,8 @@ use Happyr\DoctrineSpecification\Specification\Specification; use Shlinkio\Shlink\Core\Model\ShortUrlIdentifier; use Shlinkio\Shlink\Core\Model\VisitsParams; use Shlinkio\Shlink\Core\Repository\VisitRepositoryInterface; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsListFiltering; class VisitsPaginatorAdapter extends AbstractCacheableCountPaginatorAdapter { @@ -33,10 +35,13 @@ class VisitsPaginatorAdapter extends AbstractCacheableCountPaginatorAdapter return $this->visitRepository->findVisitsByShortCode( $this->identifier->shortCode(), $this->identifier->domain(), - $this->params->getDateRange(), - $length, - $offset, - $this->spec, + new VisitsListFiltering( + $this->params->getDateRange(), + $this->params->excludeBots(), + $this->spec, + $length, + $offset, + ), ); } @@ -45,8 +50,11 @@ class VisitsPaginatorAdapter extends AbstractCacheableCountPaginatorAdapter return $this->visitRepository->countVisitsByShortCode( $this->identifier->shortCode(), $this->identifier->domain(), - $this->params->getDateRange(), - $this->spec, + new VisitsCountFiltering( + $this->params->getDateRange(), + $this->params->excludeBots(), + $this->spec, + ), ); } } diff --git a/module/Core/src/Repository/VisitRepository.php b/module/Core/src/Repository/VisitRepository.php index 35d6a535..bc3f958b 100644 --- a/module/Core/src/Repository/VisitRepository.php +++ b/module/Core/src/Repository/VisitRepository.php @@ -7,11 +7,12 @@ namespace Shlinkio\Shlink\Core\Repository; use Doctrine\ORM\Query\ResultSetMappingBuilder; use Doctrine\ORM\QueryBuilder; use Happyr\DoctrineSpecification\Repository\EntitySpecificationRepository; -use Happyr\DoctrineSpecification\Specification\Specification; use Shlinkio\Shlink\Common\Util\DateRange; use Shlinkio\Shlink\Core\Entity\ShortUrl; use Shlinkio\Shlink\Core\Entity\Visit; use Shlinkio\Shlink\Core\Entity\VisitLocation; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsListFiltering; use Shlinkio\Shlink\Core\Visit\Spec\CountOfOrphanVisits; use Shlinkio\Shlink\Core\Visit\Spec\CountOfShortUrlVisits; use Shlinkio\Shlink\Rest\Entity\ApiKey; @@ -83,25 +84,15 @@ class VisitRepository extends EntitySpecificationRepository implements VisitRepo /** * @return Visit[] */ - public function findVisitsByShortCode( - string $shortCode, - ?string $domain = null, - ?DateRange $dateRange = null, - ?int $limit = null, - ?int $offset = null, - ?Specification $spec = null - ): array { - $qb = $this->createVisitsByShortCodeQueryBuilder($shortCode, $domain, $dateRange, $spec); - return $this->resolveVisitsWithNativeQuery($qb, $limit, $offset); + public function findVisitsByShortCode(string $shortCode, ?string $domain, VisitsListFiltering $filtering): array + { + $qb = $this->createVisitsByShortCodeQueryBuilder($shortCode, $domain, $filtering); + return $this->resolveVisitsWithNativeQuery($qb, $filtering->limit(), $filtering->offset()); } - public function countVisitsByShortCode( - string $shortCode, - ?string $domain = null, - ?DateRange $dateRange = null, - ?Specification $spec = null - ): int { - $qb = $this->createVisitsByShortCodeQueryBuilder($shortCode, $domain, $dateRange, $spec); + public function countVisitsByShortCode(string $shortCode, ?string $domain, VisitsCountFiltering $filtering): int + { + $qb = $this->createVisitsByShortCodeQueryBuilder($shortCode, $domain, $filtering); $qb->select('COUNT(v.id)'); return (int) $qb->getQuery()->getSingleScalarResult(); @@ -110,12 +101,11 @@ class VisitRepository extends EntitySpecificationRepository implements VisitRepo private function createVisitsByShortCodeQueryBuilder( string $shortCode, ?string $domain, - ?DateRange $dateRange, - ?Specification $spec = null + VisitsCountFiltering $filtering ): QueryBuilder { /** @var ShortUrlRepositoryInterface $shortUrlRepo */ $shortUrlRepo = $this->getEntityManager()->getRepository(ShortUrl::class); - $shortUrl = $shortUrlRepo->findOne($shortCode, $domain, $spec); + $shortUrl = $shortUrlRepo->findOne($shortCode, $domain, $filtering->spec()); $shortUrlId = $shortUrl !== null ? $shortUrl->getId() : -1; // Parameters in this query need to be part of the query itself, as we need to use it a sub-query later @@ -124,36 +114,32 @@ class VisitRepository extends EntitySpecificationRepository implements VisitRepo $qb->from(Visit::class, 'v') ->where($qb->expr()->eq('v.shortUrl', $shortUrlId)); + if ($filtering->excludeBots()) { + $qb->andWhere($qb->expr()->eq('v.potentialBot', 'false')); + } + // Apply date range filtering - $this->applyDatesInline($qb, $dateRange); + $this->applyDatesInline($qb, $filtering->dateRange()); return $qb; } - public function findVisitsByTag( - string $tag, - ?DateRange $dateRange = null, - ?int $limit = null, - ?int $offset = null, - ?Specification $spec = null - ): array { - $qb = $this->createVisitsByTagQueryBuilder($tag, $dateRange, $spec); - return $this->resolveVisitsWithNativeQuery($qb, $limit, $offset); + public function findVisitsByTag(string $tag, VisitsListFiltering $filtering): array + { + $qb = $this->createVisitsByTagQueryBuilder($tag, $filtering); + return $this->resolveVisitsWithNativeQuery($qb, $filtering->limit(), $filtering->offset()); } - public function countVisitsByTag(string $tag, ?DateRange $dateRange = null, ?Specification $spec = null): int + public function countVisitsByTag(string $tag, VisitsCountFiltering $filtering): int { - $qb = $this->createVisitsByTagQueryBuilder($tag, $dateRange, $spec); + $qb = $this->createVisitsByTagQueryBuilder($tag, $filtering); $qb->select('COUNT(v.id)'); return (int) $qb->getQuery()->getSingleScalarResult(); } - private function createVisitsByTagQueryBuilder( - string $tag, - ?DateRange $dateRange, - ?Specification $spec - ): QueryBuilder { + private function createVisitsByTagQueryBuilder(string $tag, VisitsCountFiltering $filtering): QueryBuilder + { // Parameters in this query need to be inlined, not bound, as we need to use it as sub-query later // Since they are not strictly provided by the caller, it's reasonably safe $qb = $this->getEntityManager()->createQueryBuilder(); @@ -162,13 +148,17 @@ class VisitRepository extends EntitySpecificationRepository implements VisitRepo ->join('s.tags', 't') ->where($qb->expr()->eq('t.name', '\'' . $tag . '\'')); // This needs to be concatenated, not bound - $this->applyDatesInline($qb, $dateRange); - $this->applySpecification($qb, $spec, 'v'); + if ($filtering->excludeBots()) { + $qb->andWhere($qb->expr()->eq('v.potentialBot', 'false')); + } + + $this->applyDatesInline($qb, $filtering->dateRange()); + $this->applySpecification($qb, $filtering->spec(), 'v'); return $qb; } - public function findOrphanVisits(?DateRange $dateRange = null, ?int $limit = null, ?int $offset = null): array + public function findOrphanVisits(VisitsListFiltering $filtering): array { // Parameters in this query need to be inlined, not bound, as we need to use it as sub-query later // Since they are not strictly provided by the caller, it's reasonably safe @@ -176,14 +166,18 @@ class VisitRepository extends EntitySpecificationRepository implements VisitRepo $qb->from(Visit::class, 'v') ->where($qb->expr()->isNull('v.shortUrl')); - $this->applyDatesInline($qb, $dateRange); + if ($filtering->excludeBots()) { + $qb->andWhere($qb->expr()->eq('v.potentialBot', 'false')); + } - return $this->resolveVisitsWithNativeQuery($qb, $limit, $offset); + $this->applyDatesInline($qb, $filtering->dateRange()); + + return $this->resolveVisitsWithNativeQuery($qb, $filtering->limit(), $filtering->offset()); } - public function countOrphanVisits(?DateRange $dateRange = null): int + public function countOrphanVisits(VisitsCountFiltering $filtering): int { - return (int) $this->matchSingleScalarResult(new CountOfOrphanVisits($dateRange)); + return (int) $this->matchSingleScalarResult(new CountOfOrphanVisits($filtering)); } public function countVisits(?ApiKey $apiKey = null): int diff --git a/module/Core/src/Repository/VisitRepositoryInterface.php b/module/Core/src/Repository/VisitRepositoryInterface.php index 96fb21ee..05a59720 100644 --- a/module/Core/src/Repository/VisitRepositoryInterface.php +++ b/module/Core/src/Repository/VisitRepositoryInterface.php @@ -6,9 +6,9 @@ namespace Shlinkio\Shlink\Core\Repository; use Doctrine\Persistence\ObjectRepository; use Happyr\DoctrineSpecification\Repository\EntitySpecificationRepositoryInterface; -use Happyr\DoctrineSpecification\Specification\Specification; -use Shlinkio\Shlink\Common\Util\DateRange; use Shlinkio\Shlink\Core\Entity\Visit; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsListFiltering; use Shlinkio\Shlink\Rest\Entity\ApiKey; interface VisitRepositoryInterface extends ObjectRepository, EntitySpecificationRepositoryInterface @@ -33,41 +33,23 @@ interface VisitRepositoryInterface extends ObjectRepository, EntitySpecification /** * @return Visit[] */ - public function findVisitsByShortCode( - string $shortCode, - ?string $domain = null, - ?DateRange $dateRange = null, - ?int $limit = null, - ?int $offset = null, - ?Specification $spec = null - ): array; + public function findVisitsByShortCode(string $shortCode, ?string $domain, VisitsListFiltering $filtering): array; - public function countVisitsByShortCode( - string $shortCode, - ?string $domain = null, - ?DateRange $dateRange = null, - ?Specification $spec = null - ): int; + public function countVisitsByShortCode(string $shortCode, ?string $domain, VisitsCountFiltering $filtering): int; /** * @return Visit[] */ - public function findVisitsByTag( - string $tag, - ?DateRange $dateRange = null, - ?int $limit = null, - ?int $offset = null, - ?Specification $spec = null - ): array; + public function findVisitsByTag(string $tag, VisitsListFiltering $filtering): array; - public function countVisitsByTag(string $tag, ?DateRange $dateRange = null, ?Specification $spec = null): int; + public function countVisitsByTag(string $tag, VisitsCountFiltering $filtering): int; /** * @return Visit[] */ - public function findOrphanVisits(?DateRange $dateRange = null, ?int $limit = null, ?int $offset = null): array; + public function findOrphanVisits(VisitsListFiltering $filtering): array; - public function countOrphanVisits(?DateRange $dateRange = null): int; + public function countOrphanVisits(VisitsCountFiltering $filtering): int; public function countVisits(?ApiKey $apiKey = null): int; } diff --git a/module/Core/src/Visit/Persistence/VisitsCountFiltering.php b/module/Core/src/Visit/Persistence/VisitsCountFiltering.php new file mode 100644 index 00000000..bc9ac5de --- /dev/null +++ b/module/Core/src/Visit/Persistence/VisitsCountFiltering.php @@ -0,0 +1,37 @@ +dateRange = $dateRange; + $this->excludeBots = $excludeBots; + $this->spec = $spec; + } + + public function dateRange(): ?DateRange + { + return $this->dateRange; + } + + public function excludeBots(): bool + { + return $this->excludeBots; + } + + public function spec(): ?Specification + { + return $this->spec; + } +} diff --git a/module/Core/src/Visit/Persistence/VisitsListFiltering.php b/module/Core/src/Visit/Persistence/VisitsListFiltering.php new file mode 100644 index 00000000..4f67967d --- /dev/null +++ b/module/Core/src/Visit/Persistence/VisitsListFiltering.php @@ -0,0 +1,36 @@ +limit = $limit; + $this->offset = $offset; + } + + public function limit(): ?int + { + return $this->limit; + } + + public function offset(): ?int + { + return $this->offset; + } +} diff --git a/module/Core/src/Visit/Spec/CountOfOrphanVisits.php b/module/Core/src/Visit/Spec/CountOfOrphanVisits.php index 97712944..b2cc9efd 100644 --- a/module/Core/src/Visit/Spec/CountOfOrphanVisits.php +++ b/module/Core/src/Visit/Spec/CountOfOrphanVisits.php @@ -7,24 +7,30 @@ namespace Shlinkio\Shlink\Core\Visit\Spec; use Happyr\DoctrineSpecification\Spec; use Happyr\DoctrineSpecification\Specification\BaseSpecification; use Happyr\DoctrineSpecification\Specification\Specification; -use Shlinkio\Shlink\Common\Util\DateRange; use Shlinkio\Shlink\Core\Spec\InDateRange; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; class CountOfOrphanVisits extends BaseSpecification { - private ?DateRange $dateRange; + private VisitsCountFiltering $filtering; - public function __construct(?DateRange $dateRange) + public function __construct(VisitsCountFiltering $filtering) { parent::__construct(); - $this->dateRange = $dateRange; + $this->filtering = $filtering; } protected function getSpec(): Specification { - return Spec::countOf(Spec::andX( + $conditions = [ Spec::isNull('shortUrl'), - new InDateRange($this->dateRange), - )); + new InDateRange($this->filtering->dateRange()), + ]; + + if ($this->filtering->excludeBots()) { + $conditions[] = Spec::eq('potentialBot', false); + } + + return Spec::countOf(Spec::andX(...$conditions)); } } diff --git a/module/Core/src/Visit/VisitsStatsHelper.php b/module/Core/src/Visit/VisitsStatsHelper.php index 61d879fd..1aa03c46 100644 --- a/module/Core/src/Visit/VisitsStatsHelper.php +++ b/module/Core/src/Visit/VisitsStatsHelper.php @@ -22,6 +22,7 @@ use Shlinkio\Shlink\Core\Repository\TagRepository; use Shlinkio\Shlink\Core\Repository\VisitRepository; use Shlinkio\Shlink\Core\Repository\VisitRepositoryInterface; use Shlinkio\Shlink\Core\Visit\Model\VisitsStats; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; use Shlinkio\Shlink\Rest\Entity\ApiKey; class VisitsStatsHelper implements VisitsStatsHelperInterface @@ -38,7 +39,10 @@ class VisitsStatsHelper implements VisitsStatsHelperInterface /** @var VisitRepository $visitsRepo */ $visitsRepo = $this->em->getRepository(Visit::class); - return new VisitsStats($visitsRepo->countVisits($apiKey), $visitsRepo->countOrphanVisits()); + return new VisitsStats( + $visitsRepo->countVisits($apiKey), + $visitsRepo->countOrphanVisits(new VisitsCountFiltering()), + ); } /** diff --git a/module/Core/test-db/Repository/VisitRepositoryTest.php b/module/Core/test-db/Repository/VisitRepositoryTest.php index 27ab3252..f77d7fd0 100644 --- a/module/Core/test-db/Repository/VisitRepositoryTest.php +++ b/module/Core/test-db/Repository/VisitRepositoryTest.php @@ -15,6 +15,8 @@ use Shlinkio\Shlink\Core\Model\ShortUrlMeta; use Shlinkio\Shlink\Core\Model\Visitor; use Shlinkio\Shlink\Core\Repository\VisitRepository; use Shlinkio\Shlink\Core\ShortUrl\Resolver\PersistenceShortUrlRelationResolver; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsListFiltering; use Shlinkio\Shlink\IpGeolocation\Model\Location; use Shlinkio\Shlink\Rest\ApiKey\Model\ApiKeyMeta; use Shlinkio\Shlink\Rest\ApiKey\Model\RoleDefinition; @@ -87,22 +89,34 @@ class VisitRepositoryTest extends DatabaseTestCase { [$shortCode, $domain] = $this->createShortUrlsAndVisits(); - self::assertCount(0, $this->repo->findVisitsByShortCode('invalid')); - self::assertCount(6, $this->repo->findVisitsByShortCode($shortCode)); - self::assertCount(3, $this->repo->findVisitsByShortCode($shortCode, $domain)); - self::assertCount(2, $this->repo->findVisitsByShortCode($shortCode, null, new DateRange( - Chronos::parse('2016-01-02'), - Chronos::parse('2016-01-03'), + self::assertCount(0, $this->repo->findVisitsByShortCode('invalid', null, new VisitsListFiltering())); + self::assertCount(6, $this->repo->findVisitsByShortCode($shortCode, null, new VisitsListFiltering())); + self::assertCount(4, $this->repo->findVisitsByShortCode($shortCode, null, new VisitsListFiltering(null, true))); + self::assertCount(3, $this->repo->findVisitsByShortCode($shortCode, $domain, new VisitsListFiltering())); + self::assertCount(2, $this->repo->findVisitsByShortCode($shortCode, null, new VisitsListFiltering( + DateRange::withStartAndEndDate(Chronos::parse('2016-01-02'), Chronos::parse('2016-01-03')), ))); - self::assertCount(4, $this->repo->findVisitsByShortCode($shortCode, null, new DateRange( - Chronos::parse('2016-01-03'), + self::assertCount(4, $this->repo->findVisitsByShortCode($shortCode, null, new VisitsListFiltering( + DateRange::withStartDate(Chronos::parse('2016-01-03')), ))); - self::assertCount(1, $this->repo->findVisitsByShortCode($shortCode, $domain, new DateRange( - Chronos::parse('2016-01-03'), + self::assertCount(1, $this->repo->findVisitsByShortCode($shortCode, $domain, new VisitsListFiltering( + DateRange::withStartDate(Chronos::parse('2016-01-03')), ))); - self::assertCount(3, $this->repo->findVisitsByShortCode($shortCode, null, null, 3, 2)); - self::assertCount(2, $this->repo->findVisitsByShortCode($shortCode, null, null, 5, 4)); - self::assertCount(1, $this->repo->findVisitsByShortCode($shortCode, $domain, null, 3, 2)); + self::assertCount(3, $this->repo->findVisitsByShortCode( + $shortCode, + null, + new VisitsListFiltering(null, false, null, 3, 2), + )); + self::assertCount(2, $this->repo->findVisitsByShortCode( + $shortCode, + null, + new VisitsListFiltering(null, false, null, 5, 4), + )); + self::assertCount(1, $this->repo->findVisitsByShortCode( + $shortCode, + $domain, + new VisitsListFiltering(null, false, null, 3, 2), + )); } /** @test */ @@ -110,18 +124,21 @@ class VisitRepositoryTest extends DatabaseTestCase { [$shortCode, $domain] = $this->createShortUrlsAndVisits(); - self::assertEquals(0, $this->repo->countVisitsByShortCode('invalid')); - self::assertEquals(6, $this->repo->countVisitsByShortCode($shortCode)); - self::assertEquals(3, $this->repo->countVisitsByShortCode($shortCode, $domain)); - self::assertEquals(2, $this->repo->countVisitsByShortCode($shortCode, null, new DateRange( - Chronos::parse('2016-01-02'), - Chronos::parse('2016-01-03'), + self::assertEquals(0, $this->repo->countVisitsByShortCode('invalid', null, new VisitsCountFiltering())); + self::assertEquals(6, $this->repo->countVisitsByShortCode($shortCode, null, new VisitsCountFiltering())); + self::assertEquals(4, $this->repo->countVisitsByShortCode($shortCode, null, new VisitsCountFiltering( + null, + true, ))); - self::assertEquals(4, $this->repo->countVisitsByShortCode($shortCode, null, new DateRange( - Chronos::parse('2016-01-03'), + self::assertEquals(3, $this->repo->countVisitsByShortCode($shortCode, $domain, new VisitsCountFiltering())); + self::assertEquals(2, $this->repo->countVisitsByShortCode($shortCode, null, new VisitsCountFiltering( + DateRange::withStartAndEndDate(Chronos::parse('2016-01-02'), Chronos::parse('2016-01-03')), ))); - self::assertEquals(1, $this->repo->countVisitsByShortCode($shortCode, $domain, new DateRange( - Chronos::parse('2016-01-03'), + self::assertEquals(4, $this->repo->countVisitsByShortCode($shortCode, null, new VisitsCountFiltering( + DateRange::withStartDate(Chronos::parse('2016-01-03')), + ))); + self::assertEquals(1, $this->repo->countVisitsByShortCode($shortCode, $domain, new VisitsCountFiltering( + DateRange::withStartDate(Chronos::parse('2016-01-03')), ))); } @@ -140,13 +157,15 @@ class VisitRepositoryTest extends DatabaseTestCase $this->createShortUrlsAndVisits(false, [$foo]); $this->getEntityManager()->flush(); - self::assertCount(0, $this->repo->findVisitsByTag('invalid')); - self::assertCount(18, $this->repo->findVisitsByTag($foo)); - self::assertCount(6, $this->repo->findVisitsByTag($foo, new DateRange( - Chronos::parse('2016-01-02'), - Chronos::parse('2016-01-03'), + self::assertCount(0, $this->repo->findVisitsByTag('invalid', new VisitsListFiltering())); + self::assertCount(18, $this->repo->findVisitsByTag($foo, new VisitsListFiltering())); + self::assertCount(12, $this->repo->findVisitsByTag($foo, new VisitsListFiltering(null, true))); + self::assertCount(6, $this->repo->findVisitsByTag($foo, new VisitsListFiltering( + DateRange::withStartAndEndDate(Chronos::parse('2016-01-02'), Chronos::parse('2016-01-03')), + ))); + self::assertCount(12, $this->repo->findVisitsByTag($foo, new VisitsListFiltering( + DateRange::withStartDate(Chronos::parse('2016-01-03')), ))); - self::assertCount(12, $this->repo->findVisitsByTag($foo, new DateRange(Chronos::parse('2016-01-03')))); } /** @test */ @@ -160,13 +179,15 @@ class VisitRepositoryTest extends DatabaseTestCase $this->createShortUrlsAndVisits(false, [$foo]); $this->getEntityManager()->flush(); - self::assertEquals(0, $this->repo->countVisitsByTag('invalid')); - self::assertEquals(12, $this->repo->countVisitsByTag($foo)); - self::assertEquals(4, $this->repo->countVisitsByTag($foo, new DateRange( - Chronos::parse('2016-01-02'), - Chronos::parse('2016-01-03'), + self::assertEquals(0, $this->repo->countVisitsByTag('invalid', new VisitsCountFiltering())); + self::assertEquals(12, $this->repo->countVisitsByTag($foo, new VisitsCountFiltering())); + self::assertEquals(8, $this->repo->countVisitsByTag($foo, new VisitsCountFiltering(null, true))); + self::assertEquals(4, $this->repo->countVisitsByTag($foo, new VisitsCountFiltering( + DateRange::withStartAndEndDate(Chronos::parse('2016-01-02'), Chronos::parse('2016-01-03')), + ))); + self::assertEquals(8, $this->repo->countVisitsByTag($foo, new VisitsCountFiltering( + DateRange::withStartDate(Chronos::parse('2016-01-03')), ))); - self::assertEquals(8, $this->repo->countVisitsByTag($foo, new DateRange(Chronos::parse('2016-01-03')))); } /** @test */ @@ -206,6 +227,7 @@ class VisitRepositoryTest extends DatabaseTestCase $this->getEntityManager()->persist(Visit::forBasePath(Visitor::emptyInstance())); $this->getEntityManager()->persist(Visit::forInvalidShortUrl(Visitor::emptyInstance())); $this->getEntityManager()->persist(Visit::forRegularNotFound(Visitor::emptyInstance())); + $this->getEntityManager()->persist(Visit::forRegularNotFound(Visitor::botInstance())); $this->getEntityManager()->flush(); @@ -213,7 +235,8 @@ class VisitRepositoryTest extends DatabaseTestCase self::assertEquals(4, $this->repo->countVisits($apiKey1)); self::assertEquals(5 + 7, $this->repo->countVisits($apiKey2)); self::assertEquals(4 + 7, $this->repo->countVisits($domainApiKey)); - self::assertEquals(3, $this->repo->countOrphanVisits()); + self::assertEquals(4, $this->repo->countOrphanVisits(new VisitsCountFiltering())); + self::assertEquals(3, $this->repo->countOrphanVisits(new VisitsCountFiltering(null, true))); } /** @test */ @@ -223,9 +246,10 @@ class VisitRepositoryTest extends DatabaseTestCase $this->getEntityManager()->persist($shortUrl); $this->createVisitsForShortUrl($shortUrl, 7); + $botsCount = 3; for ($i = 0; $i < 6; $i++) { $this->getEntityManager()->persist($this->setDateOnVisit( - Visit::forBasePath(Visitor::emptyInstance()), + Visit::forBasePath($botsCount < 1 ? Visitor::emptyInstance() : Visitor::botInstance()), Chronos::parse(sprintf('2020-01-0%s', $i + 1)), )); $this->getEntityManager()->persist($this->setDateOnVisit( @@ -236,20 +260,32 @@ class VisitRepositoryTest extends DatabaseTestCase Visit::forRegularNotFound(Visitor::emptyInstance()), Chronos::parse(sprintf('2020-01-0%s', $i + 1)), )); + + $botsCount--; } $this->getEntityManager()->flush(); - self::assertCount(18, $this->repo->findOrphanVisits()); - self::assertCount(5, $this->repo->findOrphanVisits(null, 5)); - self::assertCount(10, $this->repo->findOrphanVisits(null, 15, 8)); - self::assertCount(9, $this->repo->findOrphanVisits(DateRange::withStartDate(Chronos::parse('2020-01-04')), 15)); - self::assertCount(2, $this->repo->findOrphanVisits( + self::assertCount(18, $this->repo->findOrphanVisits(new VisitsListFiltering())); + self::assertCount(15, $this->repo->findOrphanVisits(new VisitsListFiltering(null, true))); + self::assertCount(5, $this->repo->findOrphanVisits(new VisitsListFiltering(null, false, null, 5))); + self::assertCount(10, $this->repo->findOrphanVisits(new VisitsListFiltering(null, false, null, 15, 8))); + self::assertCount(9, $this->repo->findOrphanVisits(new VisitsListFiltering( + DateRange::withStartDate(Chronos::parse('2020-01-04')), + false, + null, + 15, + ))); + self::assertCount(2, $this->repo->findOrphanVisits(new VisitsListFiltering( DateRange::withStartAndEndDate(Chronos::parse('2020-01-02'), Chronos::parse('2020-01-03')), + false, + null, 6, 4, - )); - self::assertCount(3, $this->repo->findOrphanVisits(DateRange::withEndDate(Chronos::parse('2020-01-01')))); + ))); + self::assertCount(3, $this->repo->findOrphanVisits(new VisitsListFiltering( + DateRange::withEndDate(Chronos::parse('2020-01-01')), + ))); } /** @test */ @@ -276,13 +312,17 @@ class VisitRepositoryTest extends DatabaseTestCase $this->getEntityManager()->flush(); - self::assertEquals(18, $this->repo->countOrphanVisits()); - self::assertEquals(18, $this->repo->countOrphanVisits(DateRange::emptyInstance())); - self::assertEquals(9, $this->repo->countOrphanVisits(DateRange::withStartDate(Chronos::parse('2020-01-04')))); - self::assertEquals(6, $this->repo->countOrphanVisits( - DateRange::withStartAndEndDate(Chronos::parse('2020-01-02'), Chronos::parse('2020-01-03')), + self::assertEquals(18, $this->repo->countOrphanVisits(new VisitsCountFiltering())); + self::assertEquals(18, $this->repo->countOrphanVisits(new VisitsCountFiltering(DateRange::emptyInstance()))); + self::assertEquals(9, $this->repo->countOrphanVisits( + new VisitsCountFiltering(DateRange::withStartDate(Chronos::parse('2020-01-04'))), + )); + self::assertEquals(6, $this->repo->countOrphanVisits(new VisitsCountFiltering( + DateRange::withStartAndEndDate(Chronos::parse('2020-01-02'), Chronos::parse('2020-01-03')), + ))); + self::assertEquals(3, $this->repo->countOrphanVisits( + new VisitsCountFiltering(DateRange::withEndDate(Chronos::parse('2020-01-01'))), )); - self::assertEquals(3, $this->repo->countOrphanVisits(DateRange::withEndDate(Chronos::parse('2020-01-01')))); } private function createShortUrlsAndVisits(bool $withDomain = true, array $tags = []): array @@ -311,13 +351,17 @@ class VisitRepositoryTest extends DatabaseTestCase return [$shortCode, $domain, $shortUrl]; } - private function createVisitsForShortUrl(ShortUrl $shortUrl, int $amount = 6): void + private function createVisitsForShortUrl(ShortUrl $shortUrl, int $amount = 6, int $botsAmount = 2): void { for ($i = 0; $i < $amount; $i++) { $visit = $this->setDateOnVisit( - Visit::forValidShortUrl($shortUrl, Visitor::emptyInstance()), + Visit::forValidShortUrl( + $shortUrl, + $botsAmount < 1 ? Visitor::emptyInstance() : Visitor::botInstance(), + ), Chronos::parse(sprintf('2016-01-0%s', $i + 1)), ); + $botsAmount--; $this->getEntityManager()->persist($visit); } diff --git a/module/Core/test/Entity/VisitTest.php b/module/Core/test/Entity/VisitTest.php index 7be3c3fc..2d2cb4f8 100644 --- a/module/Core/test/Entity/VisitTest.php +++ b/module/Core/test/Entity/VisitTest.php @@ -12,19 +12,35 @@ use Shlinkio\Shlink\Core\Model\Visitor; class VisitTest extends TestCase { - /** @test */ - public function isProperlyJsonSerialized(): void + /** + * @test + * @dataProvider provideUserAgents + */ + public function isProperlyJsonSerialized(string $userAgent, bool $expectedToBePotentialBot): void { - $visit = Visit::forValidShortUrl(ShortUrl::createEmpty(), new Visitor('Chrome', 'some site', '1.2.3.4', '')); + $visit = Visit::forValidShortUrl(ShortUrl::createEmpty(), new Visitor($userAgent, 'some site', '1.2.3.4', '')); self::assertEquals([ 'referer' => 'some site', 'date' => $visit->getDate()->toAtomString(), - 'userAgent' => 'Chrome', + 'userAgent' => $userAgent, 'visitLocation' => null, + 'potentialBot' => $expectedToBePotentialBot, ], $visit->jsonSerialize()); } + public function provideUserAgents(): iterable + { + yield 'Chrome' => [ + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36', + false, + ]; + yield 'Firefox' => ['Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0', false]; + yield 'Facebook' => ['cf-facebook', true]; + yield 'Twitter' => ['IDG Twitter Links Resolver', true]; + yield 'Guzzle' => ['guzzlehttp', true]; + } + /** * @test * @dataProvider provideAddresses diff --git a/module/Core/test/Mercure/MercureUpdatesGeneratorTest.php b/module/Core/test/Mercure/MercureUpdatesGeneratorTest.php index 1d460623..86d1b3d5 100644 --- a/module/Core/test/Mercure/MercureUpdatesGeneratorTest.php +++ b/module/Core/test/Mercure/MercureUpdatesGeneratorTest.php @@ -66,6 +66,7 @@ class MercureUpdatesGeneratorTest extends TestCase 'userAgent' => '', 'visitLocation' => null, 'date' => $visit->getDate()->toAtomString(), + 'potentialBot' => false, ], ], json_decode($update->getData())); } @@ -91,6 +92,7 @@ class MercureUpdatesGeneratorTest extends TestCase 'userAgent' => '', 'visitLocation' => null, 'date' => $orphanVisit->getDate()->toAtomString(), + 'potentialBot' => false, 'visitedUrl' => $orphanVisit->visitedUrl(), 'type' => $orphanVisit->type(), ], diff --git a/module/Core/test/Paginator/Adapter/OrphanVisitsPaginatorAdapterTest.php b/module/Core/test/Paginator/Adapter/OrphanVisitsPaginatorAdapterTest.php index 6b28aa68..1cc21eef 100644 --- a/module/Core/test/Paginator/Adapter/OrphanVisitsPaginatorAdapterTest.php +++ b/module/Core/test/Paginator/Adapter/OrphanVisitsPaginatorAdapterTest.php @@ -12,6 +12,8 @@ use Shlinkio\Shlink\Core\Model\Visitor; use Shlinkio\Shlink\Core\Model\VisitsParams; use Shlinkio\Shlink\Core\Paginator\Adapter\OrphanVisitsPaginatorAdapter; use Shlinkio\Shlink\Core\Repository\VisitRepositoryInterface; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsListFiltering; class OrphanVisitsPaginatorAdapterTest extends TestCase { @@ -32,7 +34,9 @@ class OrphanVisitsPaginatorAdapterTest extends TestCase public function countDelegatesToRepository(): void { $expectedCount = 5; - $repoCount = $this->repo->countOrphanVisits($this->params->getDateRange())->willReturn($expectedCount); + $repoCount = $this->repo->countOrphanVisits( + new VisitsCountFiltering($this->params->getDateRange()), + )->willReturn($expectedCount); $result = $this->adapter->getNbResults(); @@ -48,7 +52,9 @@ class OrphanVisitsPaginatorAdapterTest extends TestCase { $visitor = Visitor::emptyInstance(); $list = [Visit::forRegularNotFound($visitor), Visit::forInvalidShortUrl($visitor)]; - $repoFind = $this->repo->findOrphanVisits($this->params->getDateRange(), $limit, $offset)->willReturn($list); + $repoFind = $this->repo->findOrphanVisits( + new VisitsListFiltering($this->params->getDateRange(), $this->params->excludeBots(), null, $limit, $offset), + )->willReturn($list); $result = $this->adapter->getSlice($offset, $limit); diff --git a/module/Core/test/Paginator/Adapter/VisitsForTagPaginatorAdapterTest.php b/module/Core/test/Paginator/Adapter/VisitsForTagPaginatorAdapterTest.php index 6c95a60f..aa684b70 100644 --- a/module/Core/test/Paginator/Adapter/VisitsForTagPaginatorAdapterTest.php +++ b/module/Core/test/Paginator/Adapter/VisitsForTagPaginatorAdapterTest.php @@ -11,6 +11,8 @@ use Shlinkio\Shlink\Common\Util\DateRange; use Shlinkio\Shlink\Core\Model\VisitsParams; use Shlinkio\Shlink\Core\Paginator\Adapter\VisitsForTagPaginatorAdapter; use Shlinkio\Shlink\Core\Repository\VisitRepositoryInterface; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsListFiltering; use Shlinkio\Shlink\Rest\Entity\ApiKey; class VisitsForTagPaginatorAdapterTest extends TestCase @@ -31,7 +33,10 @@ class VisitsForTagPaginatorAdapterTest extends TestCase $limit = 1; $offset = 5; $adapter = $this->createAdapter(null); - $findVisits = $this->repo->findVisitsByTag('foo', new DateRange(), $limit, $offset, null)->willReturn([]); + $findVisits = $this->repo->findVisitsByTag( + 'foo', + new VisitsListFiltering(new DateRange(), false, null, $limit, $offset), + )->willReturn([]); for ($i = 0; $i < $count; $i++) { $adapter->getSlice($offset, $limit); @@ -46,7 +51,10 @@ class VisitsForTagPaginatorAdapterTest extends TestCase $count = 3; $apiKey = ApiKey::create(); $adapter = $this->createAdapter($apiKey); - $countVisits = $this->repo->countVisitsByTag('foo', new DateRange(), $apiKey->spec())->willReturn(3); + $countVisits = $this->repo->countVisitsByTag( + 'foo', + new VisitsCountFiltering(DateRange::emptyInstance(), false, $apiKey->spec()), + )->willReturn(3); for ($i = 0; $i < $count; $i++) { $adapter->getNbResults(); diff --git a/module/Core/test/Paginator/Adapter/VisitsPaginatorAdapterTest.php b/module/Core/test/Paginator/Adapter/VisitsPaginatorAdapterTest.php index 955e5ac5..d7b454b0 100644 --- a/module/Core/test/Paginator/Adapter/VisitsPaginatorAdapterTest.php +++ b/module/Core/test/Paginator/Adapter/VisitsPaginatorAdapterTest.php @@ -12,6 +12,8 @@ use Shlinkio\Shlink\Core\Model\ShortUrlIdentifier; use Shlinkio\Shlink\Core\Model\VisitsParams; use Shlinkio\Shlink\Core\Paginator\Adapter\VisitsPaginatorAdapter; use Shlinkio\Shlink\Core\Repository\VisitRepositoryInterface; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsListFiltering; use Shlinkio\Shlink\Rest\Entity\ApiKey; class VisitsPaginatorAdapterTest extends TestCase @@ -32,9 +34,11 @@ class VisitsPaginatorAdapterTest extends TestCase $limit = 1; $offset = 5; $adapter = $this->createAdapter(null); - $findVisits = $this->repo->findVisitsByShortCode('', null, new DateRange(), $limit, $offset, null)->willReturn( - [], - ); + $findVisits = $this->repo->findVisitsByShortCode( + '', + null, + new VisitsListFiltering(new DateRange(), false, null, $limit, $offset), + )->willReturn([]); for ($i = 0; $i < $count; $i++) { $adapter->getSlice($offset, $limit); @@ -49,7 +53,11 @@ class VisitsPaginatorAdapterTest extends TestCase $count = 3; $apiKey = ApiKey::create(); $adapter = $this->createAdapter($apiKey); - $countVisits = $this->repo->countVisitsByShortCode('', null, new DateRange(), $apiKey->spec())->willReturn(3); + $countVisits = $this->repo->countVisitsByShortCode( + '', + null, + new VisitsCountFiltering(new DateRange(), false, $apiKey->spec()), + )->willReturn(3); for ($i = 0; $i < $count; $i++) { $adapter->getNbResults(); diff --git a/module/Core/test/Visit/Transformer/OrphanVisitDataTransformerTest.php b/module/Core/test/Visit/Transformer/OrphanVisitDataTransformerTest.php index 61193c86..c836cd7c 100644 --- a/module/Core/test/Visit/Transformer/OrphanVisitDataTransformerTest.php +++ b/module/Core/test/Visit/Transformer/OrphanVisitDataTransformerTest.php @@ -42,6 +42,7 @@ class OrphanVisitDataTransformerTest extends TestCase 'date' => $visit->getDate()->toAtomString(), 'userAgent' => '', 'visitLocation' => null, + 'potentialBot' => false, 'visitedUrl' => '', 'type' => Visit::TYPE_BASE_URL, ], @@ -57,6 +58,7 @@ class OrphanVisitDataTransformerTest extends TestCase 'date' => $visit->getDate()->toAtomString(), 'userAgent' => 'foo', 'visitLocation' => null, + 'potentialBot' => false, 'visitedUrl' => 'https://example.com/foo', 'type' => Visit::TYPE_INVALID_SHORT_URL, ], @@ -74,6 +76,7 @@ class OrphanVisitDataTransformerTest extends TestCase 'date' => $visit->getDate()->toAtomString(), 'userAgent' => 'user-agent', 'visitLocation' => $location, + 'potentialBot' => false, 'visitedUrl' => 'https://doma.in/foo/bar', 'type' => Visit::TYPE_REGULAR_404, ], diff --git a/module/Core/test/Visit/VisitsStatsHelperTest.php b/module/Core/test/Visit/VisitsStatsHelperTest.php index 8e90a447..e6f067da 100644 --- a/module/Core/test/Visit/VisitsStatsHelperTest.php +++ b/module/Core/test/Visit/VisitsStatsHelperTest.php @@ -10,7 +10,6 @@ use PHPUnit\Framework\TestCase; use Prophecy\Argument; use Prophecy\PhpUnit\ProphecyTrait; use Prophecy\Prophecy\ObjectProphecy; -use Shlinkio\Shlink\Common\Util\DateRange; use Shlinkio\Shlink\Core\Entity\ShortUrl; use Shlinkio\Shlink\Core\Entity\Tag; use Shlinkio\Shlink\Core\Entity\Visit; @@ -23,6 +22,8 @@ use Shlinkio\Shlink\Core\Repository\ShortUrlRepositoryInterface; use Shlinkio\Shlink\Core\Repository\TagRepository; use Shlinkio\Shlink\Core\Repository\VisitRepository; use Shlinkio\Shlink\Core\Visit\Model\VisitsStats; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsCountFiltering; +use Shlinkio\Shlink\Core\Visit\Persistence\VisitsListFiltering; use Shlinkio\Shlink\Core\Visit\VisitsStatsHelper; use Shlinkio\Shlink\Rest\Entity\ApiKey; use ShlinkioTest\Shlink\Core\Util\ApiKeyHelpersTrait; @@ -53,7 +54,9 @@ class VisitsStatsHelperTest extends TestCase { $repo = $this->prophesize(VisitRepository::class); $count = $repo->countVisits(null)->willReturn($expectedCount * 3); - $countOrphan = $repo->countOrphanVisits()->willReturn($expectedCount); + $countOrphan = $repo->countOrphanVisits(Argument::type(VisitsCountFiltering::class))->willReturn( + $expectedCount, + ); $getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal()); $stats = $this->helper->getVisitsStats(); @@ -83,10 +86,8 @@ class VisitsStatsHelperTest extends TestCase $list = map(range(0, 1), fn () => Visit::forValidShortUrl(ShortUrl::createEmpty(), Visitor::emptyInstance())); $repo2 = $this->prophesize(VisitRepository::class); - $repo2->findVisitsByShortCode($shortCode, null, Argument::type(DateRange::class), 1, 0, $spec)->willReturn( - $list, - ); - $repo2->countVisitsByShortCode($shortCode, null, Argument::type(DateRange::class), $spec)->willReturn(1); + $repo2->findVisitsByShortCode($shortCode, null, Argument::type(VisitsListFiltering::class))->willReturn($list); + $repo2->countVisitsByShortCode($shortCode, null, Argument::type(VisitsCountFiltering::class))->willReturn(1); $this->em->getRepository(Visit::class)->willReturn($repo2->reveal())->shouldBeCalledOnce(); $paginator = $this->helper->visitsForShortUrl(new ShortUrlIdentifier($shortCode), new VisitsParams(), $apiKey); @@ -136,11 +137,10 @@ class VisitsStatsHelperTest extends TestCase $tagExists = $repo->tagExists($tag, $apiKey)->willReturn(true); $getRepo = $this->em->getRepository(Tag::class)->willReturn($repo->reveal()); - $spec = $apiKey === null ? null : $apiKey->spec(); $list = map(range(0, 1), fn () => Visit::forValidShortUrl(ShortUrl::createEmpty(), Visitor::emptyInstance())); $repo2 = $this->prophesize(VisitRepository::class); - $repo2->findVisitsByTag($tag, Argument::type(DateRange::class), 1, 0, $spec)->willReturn($list); - $repo2->countVisitsByTag($tag, Argument::type(DateRange::class), $spec)->willReturn(1); + $repo2->findVisitsByTag($tag, Argument::type(VisitsListFiltering::class))->willReturn($list); + $repo2->countVisitsByTag($tag, Argument::type(VisitsCountFiltering::class))->willReturn(1); $this->em->getRepository(Visit::class)->willReturn($repo2->reveal())->shouldBeCalledOnce(); $paginator = $this->helper->visitsForTag($tag, new VisitsParams(), $apiKey); @@ -155,8 +155,8 @@ class VisitsStatsHelperTest extends TestCase { $list = map(range(0, 3), fn () => Visit::forBasePath(Visitor::emptyInstance())); $repo = $this->prophesize(VisitRepository::class); - $countVisits = $repo->countOrphanVisits(Argument::type(DateRange::class))->willReturn(count($list)); - $listVisits = $repo->findOrphanVisits(Argument::type(DateRange::class), Argument::cetera())->willReturn($list); + $countVisits = $repo->countOrphanVisits(Argument::type(VisitsCountFiltering::class))->willReturn(count($list)); + $listVisits = $repo->findOrphanVisits(Argument::type(VisitsListFiltering::class))->willReturn($list); $getRepo = $this->em->getRepository(Visit::class)->willReturn($repo->reveal()); $paginator = $this->helper->orphanVisits(new VisitsParams()); diff --git a/module/Rest/test-api/Action/OrphanVisitsTest.php b/module/Rest/test-api/Action/OrphanVisitsTest.php index ea890f9f..067cf9a4 100644 --- a/module/Rest/test-api/Action/OrphanVisitsTest.php +++ b/module/Rest/test-api/Action/OrphanVisitsTest.php @@ -12,17 +12,18 @@ class OrphanVisitsTest extends ApiTestCase private const INVALID_SHORT_URL = [ 'referer' => 'https://doma.in/foo', 'date' => '2020-03-01T00:00:00+00:00', - 'userAgent' => 'shlink-tests-agent', + 'userAgent' => 'cf-facebook', 'visitLocation' => null, + 'potentialBot' => true, 'visitedUrl' => 'foo.com', 'type' => 'invalid_short_url', - ]; private const REGULAR_NOT_FOUND = [ 'referer' => 'https://doma.in/foo/bar', 'date' => '2020-02-01T00:00:00+00:00', 'userAgent' => 'shlink-tests-agent', 'visitLocation' => null, + 'potentialBot' => false, 'visitedUrl' => '', 'type' => 'regular_404', ]; @@ -31,6 +32,7 @@ class OrphanVisitsTest extends ApiTestCase 'date' => '2020-01-01T00:00:00+00:00', 'userAgent' => 'shlink-tests-agent', 'visitLocation' => null, + 'potentialBot' => false, 'visitedUrl' => '', 'type' => 'base_url', ]; @@ -39,21 +41,32 @@ class OrphanVisitsTest extends ApiTestCase * @test * @dataProvider provideQueries */ - public function properVisitsAreReturnedBasedInQuery(array $query, int $expectedAmount, array $expectedVisits): void - { + public function properVisitsAreReturnedBasedInQuery( + array $query, + int $totalItems, + int $expectedAmount, + array $expectedVisits + ): void { $resp = $this->callApiWithKey(self::METHOD_GET, '/visits/orphan', [RequestOptions::QUERY => $query]); $payload = $this->getJsonResponsePayload($resp); $visits = $payload['visits']['data'] ?? []; - self::assertEquals(3, $payload['visits']['pagination']['totalItems'] ?? -1); + self::assertEquals($totalItems, $payload['visits']['pagination']['totalItems'] ?? -1); self::assertCount($expectedAmount, $visits); self::assertEquals($expectedVisits, $visits); } public function provideQueries(): iterable { - yield 'all data' => [[], 3, [self::INVALID_SHORT_URL, self::REGULAR_NOT_FOUND, self::BASE_URL]]; - yield 'limit items' => [['itemsPerPage' => 2], 2, [self::INVALID_SHORT_URL, self::REGULAR_NOT_FOUND]]; - yield 'limit items and page' => [['itemsPerPage' => 2, 'page' => 2], 1, [self::BASE_URL]]; + yield 'all data' => [[], 3, 3, [self::INVALID_SHORT_URL, self::REGULAR_NOT_FOUND, self::BASE_URL]]; + yield 'limit items' => [['itemsPerPage' => 2], 3, 2, [self::INVALID_SHORT_URL, self::REGULAR_NOT_FOUND]]; + yield 'limit items and page' => [['itemsPerPage' => 2, 'page' => 2], 3, 1, [self::BASE_URL]]; + yield 'exclude bots' => [['excludeBots' => true], 2, 2, [self::REGULAR_NOT_FOUND, self::BASE_URL]]; + yield 'exclude bots and limit items' => [ + ['excludeBots' => true, 'itemsPerPage' => 1], + 2, + 1, + [self::REGULAR_NOT_FOUND], + ]; } } diff --git a/module/Rest/test-api/Action/ShortUrlVisitsTest.php b/module/Rest/test-api/Action/ShortUrlVisitsTest.php index c578d48d..1d572004 100644 --- a/module/Rest/test-api/Action/ShortUrlVisitsTest.php +++ b/module/Rest/test-api/Action/ShortUrlVisitsTest.php @@ -67,4 +67,30 @@ class ShortUrlVisitsTest extends ApiTestCase yield 'domain' => ['example.com', 0]; yield 'no domain' => [null, 2]; } + + /** + * @test + * @dataProvider provideVisitsForBots + */ + public function properVisitsAreReturnedWhenExcludingBots(bool $excludeBots, int $expectedAmountOfVisits): void + { + $shortCode = 'def456'; + $url = new Uri(sprintf('/short-urls/%s/visits', $shortCode)); + + if ($excludeBots) { + $url = $url->withQuery(Query::build(['excludeBots' => true])); + } + + $resp = $this->callApiWithKey(self::METHOD_GET, (string) $url); + $payload = $this->getJsonResponsePayload($resp); + + self::assertEquals($expectedAmountOfVisits, $payload['visits']['pagination']['totalItems'] ?? -1); + self::assertCount($expectedAmountOfVisits, $payload['visits']['data'] ?? []); + } + + public function provideVisitsForBots(): iterable + { + yield 'bots excluded' => [true, 1]; + yield 'bots not excluded' => [false, 2]; + } } diff --git a/module/Rest/test-api/Action/TagVisitsTest.php b/module/Rest/test-api/Action/TagVisitsTest.php index b30b787f..07b0576d 100644 --- a/module/Rest/test-api/Action/TagVisitsTest.php +++ b/module/Rest/test-api/Action/TagVisitsTest.php @@ -4,6 +4,7 @@ declare(strict_types=1); namespace ShlinkioApiTest\Shlink\Rest\Action; +use GuzzleHttp\RequestOptions; use Shlinkio\Shlink\TestUtils\ApiTest\ApiTestCase; use function sprintf; @@ -14,9 +15,15 @@ class TagVisitsTest extends ApiTestCase * @test * @dataProvider provideTags */ - public function expectedVisitsAreReturned(string $apiKey, string $tag, int $expectedVisitsAmount): void - { - $resp = $this->callApiWithKey(self::METHOD_GET, sprintf('/tags/%s/visits', $tag), [], $apiKey); + public function expectedVisitsAreReturned( + string $apiKey, + string $tag, + bool $excludeBots, + int $expectedVisitsAmount + ): void { + $resp = $this->callApiWithKey(self::METHOD_GET, sprintf('/tags/%s/visits', $tag), [ + RequestOptions::QUERY => $excludeBots ? ['excludeBots' => true] : [], + ], $apiKey); $payload = $this->getJsonResponsePayload($resp); self::assertEquals(self::STATUS_OK, $resp->getStatusCode()); @@ -27,12 +34,16 @@ class TagVisitsTest extends ApiTestCase public function provideTags(): iterable { - yield 'foo with admin API key' => ['valid_api_key', 'foo', 5]; - yield 'bar with admin API key' => ['valid_api_key', 'bar', 2]; - yield 'baz with admin API key' => ['valid_api_key', 'baz', 0]; - yield 'foo with author API key' => ['author_api_key', 'foo', 5]; - yield 'bar with author API key' => ['author_api_key', 'bar', 2]; - yield 'foo with domain API key' => ['domain_api_key', 'foo', 0]; + yield 'foo with admin API key' => ['valid_api_key', 'foo', false, 5]; + yield 'foo with admin API key and no bots' => ['valid_api_key', 'foo', true, 4]; + yield 'bar with admin API key' => ['valid_api_key', 'bar', false, 2]; + yield 'bar with admin API key and no bots' => ['valid_api_key', 'bar', true, 1]; + yield 'baz with admin API key' => ['valid_api_key', 'baz', false, 0]; + yield 'foo with author API key' => ['author_api_key', 'foo', false, 5]; + yield 'foo with author API key and no bots' => ['author_api_key', 'foo', true, 4]; + yield 'bar with author API key' => ['author_api_key', 'bar', false, 2]; + yield 'bar with author API key and no bots' => ['author_api_key', 'bar', true, 1]; + yield 'foo with domain API key' => ['domain_api_key', 'foo', false, 0]; } /** diff --git a/module/Rest/test-api/Fixtures/VisitsFixture.php b/module/Rest/test-api/Fixtures/VisitsFixture.php index 412c79d5..4432df92 100644 --- a/module/Rest/test-api/Fixtures/VisitsFixture.php +++ b/module/Rest/test-api/Fixtures/VisitsFixture.php @@ -36,7 +36,7 @@ class VisitsFixture extends AbstractFixture implements DependentFixtureInterface /** @var ShortUrl $defShortUrl */ $defShortUrl = $this->getReference('def456_short_url'); $manager->persist( - Visit::forValidShortUrl($defShortUrl, new Visitor('shlink-tests-agent', '', '127.0.0.1', '')), + Visit::forValidShortUrl($defShortUrl, new Visitor('cf-facebook', '', '127.0.0.1', '')), ); $manager->persist( Visit::forValidShortUrl($defShortUrl, new Visitor('shlink-tests-agent', 'https://app.shlink.io', '', '')), @@ -58,7 +58,7 @@ class VisitsFixture extends AbstractFixture implements DependentFixtureInterface '2020-02-01', )); $manager->persist($this->setVisitDate( - Visit::forInvalidShortUrl(new Visitor('shlink-tests-agent', 'https://doma.in/foo', '1.2.3.4', 'foo.com')), + Visit::forInvalidShortUrl(new Visitor('cf-facebook', 'https://doma.in/foo', '1.2.3.4', 'foo.com')), '2020-03-01', ));