mirror of
https://github.com/shlinkio/shlink.git
synced 2026-03-06 23:33:13 +08:00
Add option to allow all URLs to be crawlable via robots.txt
This commit is contained in:
@@ -189,7 +189,7 @@ return [
|
||||
'Logger_Shlink',
|
||||
Options\QrCodeOptions::class,
|
||||
],
|
||||
Action\RobotsAction::class => [Crawling\CrawlingHelper::class],
|
||||
Action\RobotsAction::class => [Crawling\CrawlingHelper::class, 'config.robots.allow-all-short-urls'],
|
||||
|
||||
ShortUrl\Resolver\PersistenceShortUrlRelationResolver::class => [
|
||||
'em',
|
||||
|
||||
@@ -15,9 +15,9 @@ use function sprintf;
|
||||
|
||||
use const PHP_EOL;
|
||||
|
||||
class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
|
||||
readonly class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
|
||||
{
|
||||
public function __construct(private readonly CrawlingHelperInterface $crawlingHelper)
|
||||
public function __construct(private CrawlingHelperInterface $crawlingHelper, private bool $allowAllShortUrls)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -37,6 +37,12 @@ class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
|
||||
|
||||
ROBOTS;
|
||||
|
||||
if ($this->allowAllShortUrls) {
|
||||
// Disallow rest URLs, but allow all short codes
|
||||
yield 'Disallow: /rest/';
|
||||
return;
|
||||
}
|
||||
|
||||
$shortCodes = $this->crawlingHelper->listCrawlableShortCodes();
|
||||
foreach ($shortCodes as $shortCode) {
|
||||
yield sprintf('Allow: /%s%s', $shortCode, PHP_EOL);
|
||||
|
||||
@@ -69,8 +69,9 @@ enum EnvVars: string
|
||||
case DEFAULT_DOMAIN = 'DEFAULT_DOMAIN';
|
||||
case AUTO_RESOLVE_TITLES = 'AUTO_RESOLVE_TITLES';
|
||||
case REDIRECT_APPEND_EXTRA_PATH = 'REDIRECT_APPEND_EXTRA_PATH';
|
||||
case TIMEZONE = 'TIMEZONE';
|
||||
case MULTI_SEGMENT_SLUGS_ENABLED = 'MULTI_SEGMENT_SLUGS_ENABLED';
|
||||
case ROBOTS_ALLOW_ALL_SHORT_URLS = 'ROBOTS_ALLOW_ALL_SHORT_URLS';
|
||||
case TIMEZONE = 'TIMEZONE';
|
||||
case MEMORY_LIMIT = 'MEMORY_LIMIT';
|
||||
|
||||
public function loadFromEnv(mixed $default = null): mixed
|
||||
|
||||
@@ -14,24 +14,25 @@ use Shlinkio\Shlink\Core\Crawling\CrawlingHelperInterface;
|
||||
|
||||
class RobotsActionTest extends TestCase
|
||||
{
|
||||
private RobotsAction $action;
|
||||
private MockObject & CrawlingHelperInterface $helper;
|
||||
|
||||
protected function setUp(): void
|
||||
{
|
||||
$this->helper = $this->createMock(CrawlingHelperInterface::class);
|
||||
$this->action = new RobotsAction($this->helper);
|
||||
}
|
||||
|
||||
#[Test, DataProvider('provideShortCodes')]
|
||||
public function buildsRobotsLinesFromCrawlableShortCodes(array $shortCodes, string $expected): void
|
||||
{
|
||||
public function buildsRobotsLinesFromCrawlableShortCodes(
|
||||
array $shortCodes,
|
||||
bool $allowAllShortUrls,
|
||||
string $expected,
|
||||
): void {
|
||||
$this->helper
|
||||
->expects($this->once())
|
||||
->expects($allowAllShortUrls ? $this->never() : $this->once())
|
||||
->method('listCrawlableShortCodes')
|
||||
->willReturn($shortCodes);
|
||||
|
||||
$response = $this->action->handle(ServerRequestFactory::fromGlobals());
|
||||
$response = $this->action($allowAllShortUrls)->handle(ServerRequestFactory::fromGlobals());
|
||||
|
||||
self::assertEquals(200, $response->getStatusCode());
|
||||
self::assertEquals($expected, $response->getBody()->__toString());
|
||||
@@ -40,7 +41,7 @@ class RobotsActionTest extends TestCase
|
||||
|
||||
public static function provideShortCodes(): iterable
|
||||
{
|
||||
yield 'three short codes' => [['foo', 'bar', 'baz'], <<<ROBOTS
|
||||
yield 'three short codes' => [['foo', 'bar', 'baz'], false, <<<ROBOTS
|
||||
# For more information about the robots.txt standard, see:
|
||||
# https://www.robotstxt.org/orig.html
|
||||
|
||||
@@ -50,7 +51,7 @@ class RobotsActionTest extends TestCase
|
||||
Allow: /baz
|
||||
Disallow: /
|
||||
ROBOTS];
|
||||
yield 'five short codes' => [['foo', 'bar', 'some', 'thing', 'baz'], <<<ROBOTS
|
||||
yield 'five short codes' => [['foo', 'bar', 'some', 'thing', 'baz'], false, <<<ROBOTS
|
||||
# For more information about the robots.txt standard, see:
|
||||
# https://www.robotstxt.org/orig.html
|
||||
|
||||
@@ -62,12 +63,31 @@ class RobotsActionTest extends TestCase
|
||||
Allow: /baz
|
||||
Disallow: /
|
||||
ROBOTS];
|
||||
yield 'no short codes' => [[], <<<ROBOTS
|
||||
yield 'no short codes' => [[], false, <<<ROBOTS
|
||||
# For more information about the robots.txt standard, see:
|
||||
# https://www.robotstxt.org/orig.html
|
||||
|
||||
User-agent: *
|
||||
Disallow: /
|
||||
ROBOTS];
|
||||
yield 'three short codes and allow all short urls' => [['foo', 'bar', 'some'], true, <<<ROBOTS
|
||||
# For more information about the robots.txt standard, see:
|
||||
# https://www.robotstxt.org/orig.html
|
||||
|
||||
User-agent: *
|
||||
Disallow: /rest/
|
||||
ROBOTS];
|
||||
yield 'no short codes and allow all short urls' => [[], true, <<<ROBOTS
|
||||
# For more information about the robots.txt standard, see:
|
||||
# https://www.robotstxt.org/orig.html
|
||||
|
||||
User-agent: *
|
||||
Disallow: /rest/
|
||||
ROBOTS];
|
||||
}
|
||||
|
||||
private function action(bool $allowAllShortUrls = false): RobotsAction
|
||||
{
|
||||
return new RobotsAction($this->helper, allowAllShortUrls: $allowAllShortUrls);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user