Add option to allow all URLs to be crawlable via robots.txt

This commit is contained in:
Alejandro Celaya
2024-04-21 17:09:20 +02:00
parent a89b53af4f
commit 163244f40f
8 changed files with 72 additions and 14 deletions

View File

@@ -15,9 +15,9 @@ use function sprintf;
use const PHP_EOL;
class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
readonly class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
{
public function __construct(private readonly CrawlingHelperInterface $crawlingHelper)
public function __construct(private CrawlingHelperInterface $crawlingHelper, private bool $allowAllShortUrls)
{
}
@@ -37,6 +37,12 @@ class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
ROBOTS;
if ($this->allowAllShortUrls) {
// Disallow rest URLs, but allow all short codes
yield 'Disallow: /rest/';
return;
}
$shortCodes = $this->crawlingHelper->listCrawlableShortCodes();
foreach ($shortCodes as $shortCode) {
yield sprintf('Allow: /%s%s', $shortCode, PHP_EOL);

View File

@@ -69,8 +69,9 @@ enum EnvVars: string
case DEFAULT_DOMAIN = 'DEFAULT_DOMAIN';
case AUTO_RESOLVE_TITLES = 'AUTO_RESOLVE_TITLES';
case REDIRECT_APPEND_EXTRA_PATH = 'REDIRECT_APPEND_EXTRA_PATH';
case TIMEZONE = 'TIMEZONE';
case MULTI_SEGMENT_SLUGS_ENABLED = 'MULTI_SEGMENT_SLUGS_ENABLED';
case ROBOTS_ALLOW_ALL_SHORT_URLS = 'ROBOTS_ALLOW_ALL_SHORT_URLS';
case TIMEZONE = 'TIMEZONE';
case MEMORY_LIMIT = 'MEMORY_LIMIT';
public function loadFromEnv(mixed $default = null): mixed