Add option to customize user agents in robots.txt

This commit is contained in:
Alejandro Celaya
2024-07-05 08:52:41 +02:00
parent 76c42bc17c
commit 4b52c92e97
9 changed files with 96 additions and 44 deletions

View File

@@ -10,6 +10,7 @@ use Psr\Http\Message\ResponseInterface;
use Psr\Http\Message\ServerRequestInterface;
use Psr\Http\Server\RequestHandlerInterface;
use Shlinkio\Shlink\Core\Crawling\CrawlingHelperInterface;
use Shlinkio\Shlink\Core\Options\RobotsOptions;
use function sprintf;
@@ -17,7 +18,7 @@ use const PHP_EOL;
readonly class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
{
public function __construct(private CrawlingHelperInterface $crawlingHelper, private bool $allowAllShortUrls)
public function __construct(private CrawlingHelperInterface $crawlingHelper, private RobotsOptions $robotsOptions)
{
}
@@ -33,11 +34,15 @@ readonly class RobotsAction implements RequestHandlerInterface, StatusCodeInterf
# For more information about the robots.txt standard, see:
# https://www.robotstxt.org/orig.html
User-agent: *
ROBOTS;
if ($this->allowAllShortUrls) {
$userAgents = $this->robotsOptions->hasUserAgents() ? $this->robotsOptions->userAgents : ['*'];
foreach ($userAgents as $userAgent) {
yield sprintf('User-agent: %s%s', $userAgent, PHP_EOL);
}
if ($this->robotsOptions->allowAllShortUrls) {
// Disallow rest URLs, but allow all short codes
yield 'Disallow: /rest/';
return;

View File

@@ -71,6 +71,7 @@ enum EnvVars: string
case REDIRECT_APPEND_EXTRA_PATH = 'REDIRECT_APPEND_EXTRA_PATH';
case MULTI_SEGMENT_SLUGS_ENABLED = 'MULTI_SEGMENT_SLUGS_ENABLED';
case ROBOTS_ALLOW_ALL_SHORT_URLS = 'ROBOTS_ALLOW_ALL_SHORT_URLS';
case ROBOTS_USER_AGENTS = 'ROBOTS_USER_AGENTS';
case TIMEZONE = 'TIMEZONE';
case MEMORY_LIMIT = 'MEMORY_LIMIT';

View File

@@ -0,0 +1,22 @@
<?php
declare(strict_types=1);
namespace Shlinkio\Shlink\Core\Options;
use function count;
final readonly class RobotsOptions
{
public function __construct(
public bool $allowAllShortUrls = false,
/** @var string[] */
public array $userAgents = [],
) {
}
public function hasUserAgents(): bool
{
return count($this->userAgents) > 0;
}
}