Turn scheme-less URLs into HTTPS instead of HTTP links (#1225)

* Turn scheme-less URLs into HTTPS instead of HTTP links

Signed-off-by: Max Kunzelmann <maxdev@posteo.de>

* fix bug, add tests

* use single linker instance

* simplify logic

* lint

---------

Signed-off-by: Max Kunzelmann <maxdev@posteo.de>
Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
Max
2026-01-05 16:45:33 +01:00
committed by GitHub
parent afbf85b249
commit cbc8618805
2 changed files with 54 additions and 1 deletions

View File

@@ -2,6 +2,7 @@ import re
import bleach
import markdown
from bleach.linkifier import DEFAULT_CALLBACKS, Linker
from bleach_allowlist import markdown_attrs, markdown_tags
from django import template
from django.utils.safestring import mark_safe
@@ -78,6 +79,22 @@ class HtmlMinNode(template.Node):
return output
def schemeless_urls_to_https(attrs, _new):
href_key = (None, "href")
if href_key not in attrs:
return attrs
if attrs.get("_text", "").startswith("http://"):
# The original text explicitly specifies http://, so keep it
return attrs
attrs[href_key] = re.sub(r"^http://", "https://", attrs[href_key])
return attrs
linker = Linker(callbacks=[*DEFAULT_CALLBACKS, schemeless_urls_to_https])
@register.simple_tag(name="markdown", takes_context=True)
def render_markdown(context, markdown_text):
# naive approach to reusing the renderer for a single request
@@ -90,7 +107,7 @@ def render_markdown(context, markdown_text):
as_html = renderer.convert(markdown_text)
sanitized_html = bleach.clean(as_html, markdown_tags, markdown_attrs)
linkified_html = bleach.linkify(sanitized_html)
linkified_html = linker.linkify(sanitized_html)
return mark_safe(linkified_html)

View File

@@ -919,6 +919,42 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin):
note_html = '<p><a href="https://example.com" rel="nofollow">https://example.com</a></p>'
self.assertNotes(html, note_html, 1)
def test_note_linkify_converts_schemeless_urls_to_https(self):
# Scheme-less URL should become HTTPS
self.setup_bookmark(notes="Example: example.com")
html = self.render_template()
note_html = '<p>Example: <a href="https://example.com" rel="nofollow">example.com</a></p>'
self.assertNotes(html, note_html, 1)
# Explicit http:// should stay as http://
self.setup_bookmark(notes="Example: http://example.com")
html = self.render_template()
note_html = '<p>Example: <a href="http://example.com" rel="nofollow">http://example.com</a></p>'
self.assertNotes(html, note_html, 1)
# Explicit https:// should stay as https://
self.setup_bookmark(notes="Example: https://example.com")
html = self.render_template()
note_html = '<p>Example: <a href="https://example.com" rel="nofollow">https://example.com</a></p>'
self.assertNotes(html, note_html, 1)
# Email addresses should not be affected
self.setup_bookmark(notes="Contact: hello@example.com")
html = self.render_template()
note_html = "<p>Contact: hello@example.com</p>"
self.assertNotes(html, note_html, 1)
# ftp:// should not be converted to https
self.setup_bookmark(notes="FTP: ftp://ftp.example.com")
html = self.render_template()
note_html = '<p>FTP: <a href="ftp://ftp.example.com" rel="nofollow">ftp://ftp.example.com</a></p>'
self.assertNotes(html, note_html, 1)
def test_note_cleans_html(self):
self.setup_bookmark(notes='<script>alert("test")</script>')
self.setup_bookmark(