import datetime
import gzip
import os
from datetime import timedelta
from pathlib import Path
from unittest import mock
from django.core.files.uploadedfile import SimpleUploadedFile
from django.test import TestCase, override_settings
from django.utils import timezone
from bookmarks.models import BookmarkAsset
from bookmarks.services import assets
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
class AssetServiceTestCase(TestCase, BookmarkFactoryMixin):
def setUp(self) -> None:
self.setup_temp_assets_dir()
self.get_or_create_test_user()
self.html_content = "
Hello, World!
"
self.pdf_content = b"%PDF-1.4 test pdf content"
self.mock_singlefile_create_snapshot_patcher = mock.patch(
"bookmarks.services.singlefile.create_snapshot",
)
self.mock_singlefile_create_snapshot = (
self.mock_singlefile_create_snapshot_patcher.start()
)
self.mock_singlefile_create_snapshot.side_effect = lambda url, filepath: (
Path(filepath).write_text(self.html_content)
)
# Mock detect_content_type to return text/html by default
self.mock_detect_content_type_patcher = mock.patch(
"bookmarks.services.assets.detect_content_type",
)
self.mock_detect_content_type = self.mock_detect_content_type_patcher.start()
self.mock_detect_content_type.return_value = "text/html"
# Mock is_pdf_content_type to return False by default
self.mock_is_pdf_content_type_patcher = mock.patch(
"bookmarks.services.assets.is_pdf_content_type",
)
self.mock_is_pdf_content_type = self.mock_is_pdf_content_type_patcher.start()
self.mock_is_pdf_content_type.return_value = False
def tearDown(self) -> None:
self.mock_singlefile_create_snapshot_patcher.stop()
self.mock_detect_content_type_patcher.stop()
self.mock_is_pdf_content_type_patcher.stop()
def get_saved_snapshot_file(self):
# look up first file in the asset folder
files = os.listdir(self.assets_dir)
if files:
return files[0]
def create_mock_pdf_response(self, content=None, content_length=None):
if content is None:
content = self.pdf_content
mock_response = mock.Mock()
mock_response.status_code = 200
mock_response.headers = {"Content-Type": "application/pdf"}
if content_length is not None:
mock_response.headers["Content-Length"] = str(content_length)
mock_response.iter_content = mock.Mock(return_value=[content])
mock_response.raise_for_status = mock.Mock()
mock_response.__enter__ = mock.Mock(return_value=mock_response)
mock_response.__exit__ = mock.Mock(return_value=False)
return mock_response
def test_create_snapshot_asset(self):
bookmark = self.setup_bookmark()
asset = assets.create_snapshot_asset(bookmark)
self.assertIsNotNone(asset)
self.assertEqual(asset.bookmark, bookmark)
self.assertEqual(asset.asset_type, BookmarkAsset.TYPE_SNAPSHOT)
self.assertEqual(asset.content_type, "")
self.assertEqual(asset.display_name, "New snapshot")
self.assertEqual(asset.status, BookmarkAsset.STATUS_PENDING)
# asset is not saved to the database
self.assertIsNone(asset.id)
def test_create_snapshot(self):
initial_modified = timezone.datetime(2025, 1, 1, 0, 0, 0, tzinfo=datetime.UTC)
bookmark = self.setup_bookmark(
url="https://example.com", modified=initial_modified
)
asset = assets.create_snapshot_asset(bookmark)
asset.save()
asset.date_created = timezone.datetime(
2023, 8, 11, 21, 45, 11, tzinfo=datetime.UTC
)
assets.create_snapshot(asset)
expected_temp_filename = "snapshot_2023-08-11_214511_https___example.com.tmp"
expected_temp_filepath = os.path.join(self.assets_dir, expected_temp_filename)
expected_filename = "snapshot_2023-08-11_214511_https___example.com.html.gz"
expected_filepath = os.path.join(self.assets_dir, expected_filename)
# should call singlefile.create_snapshot with the correct arguments
self.mock_singlefile_create_snapshot.assert_called_once_with(
"https://example.com",
expected_temp_filepath,
)
# should create gzip file in asset folder
self.assertTrue(os.path.exists(expected_filepath))
# gzip file should contain the correct content
with gzip.open(expected_filepath, "rb") as gz_file:
self.assertEqual(gz_file.read().decode(), self.html_content)
# should remove temporary file
self.assertFalse(os.path.exists(expected_temp_filepath))
# should update asset status and file
asset.refresh_from_db()
self.assertEqual(asset.status, BookmarkAsset.STATUS_COMPLETE)
self.assertEqual(asset.content_type, BookmarkAsset.CONTENT_TYPE_HTML)
self.assertIn("HTML snapshot from", asset.display_name)
self.assertEqual(asset.file, expected_filename)
self.assertTrue(asset.gzip)
# should update bookmark modified date
bookmark.refresh_from_db()
def test_create_snapshot_failure(self):
bookmark = self.setup_bookmark(url="https://example.com")
asset = assets.create_snapshot_asset(bookmark)
asset.save()
self.mock_singlefile_create_snapshot.side_effect = RuntimeError(
"Snapshot failed"
)
with self.assertRaises(RuntimeError):
assets.create_snapshot(asset)
asset.refresh_from_db()
self.assertEqual(asset.status, BookmarkAsset.STATUS_FAILURE)
def test_create_snapshot_truncates_asset_file_name(self):
# Create a bookmark with a very long URL
long_url = "http://" + "a" * 300 + ".com"
bookmark = self.setup_bookmark(url=long_url)
asset = assets.create_snapshot_asset(bookmark)
asset.save()
assets.create_snapshot(asset)
saved_file = self.get_saved_snapshot_file()
self.assertEqual(192, len(saved_file))
self.assertTrue(saved_file.startswith("snapshot_"))
self.assertTrue(saved_file.endswith("aaaa.html.gz"))
def test_create_pdf_snapshot(self):
bookmark = self.setup_bookmark(url="https://example.com/doc.pdf")
asset = assets.create_snapshot_asset(bookmark)
asset.save()
asset.date_created = timezone.datetime(
2023, 8, 11, 21, 45, 11, tzinfo=datetime.UTC
)
self.mock_detect_content_type.return_value = "application/pdf"
self.mock_is_pdf_content_type.return_value = True
with mock.patch("bookmarks.services.assets.requests.get") as mock_get:
mock_get.return_value = self.create_mock_pdf_response()
assets.create_snapshot(asset)
expected_filename = (
"snapshot_2023-08-11_214511_https___example.com_doc.pdf.pdf.gz"
)
expected_filepath = os.path.join(self.assets_dir, expected_filename)
# should create gzip file in asset folder
self.assertTrue(os.path.exists(expected_filepath))
# gzip file should contain the correct content
with gzip.open(expected_filepath, "rb") as gz_file:
self.assertEqual(gz_file.read(), self.pdf_content)
# should update asset status and file
asset.refresh_from_db()
self.assertEqual(asset.status, BookmarkAsset.STATUS_COMPLETE)
self.assertEqual(asset.file, expected_filename)
self.assertEqual(asset.content_type, BookmarkAsset.CONTENT_TYPE_PDF)
self.assertIn("PDF download from", asset.display_name)
self.assertTrue(asset.gzip)
# should update bookmark
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, asset)
def test_create_snapshot_falls_back_to_singlefile_when_detection_fails(self):
bookmark = self.setup_bookmark(url="https://example.com")
asset = assets.create_snapshot_asset(bookmark)
asset.save()
self.mock_detect_content_type.return_value = None # Detection failed
assets.create_snapshot(asset)
asset.refresh_from_db()
self.assertEqual(asset.status, BookmarkAsset.STATUS_COMPLETE)
self.assertEqual(asset.content_type, BookmarkAsset.CONTENT_TYPE_HTML)
self.mock_singlefile_create_snapshot.assert_called()
@override_settings(LD_SNAPSHOT_PDF_MAX_SIZE=100)
def test_create_pdf_snapshot_fails_when_content_length_exceeds_limit(self):
bookmark = self.setup_bookmark(url="https://example.com/doc.pdf")
asset = assets.create_snapshot_asset(bookmark)
asset.save()
self.mock_detect_content_type.return_value = "application/pdf"
self.mock_is_pdf_content_type.return_value = True
with mock.patch("bookmarks.services.assets.requests.get") as mock_get:
mock_get.return_value = self.create_mock_pdf_response(
content_length=1000 # Exceeds 100 byte limit
)
with self.assertRaises(assets.PdfTooLargeError):
assets.create_snapshot(asset)
asset.refresh_from_db()
self.assertEqual(asset.status, BookmarkAsset.STATUS_FAILURE)
@override_settings(LD_SNAPSHOT_PDF_MAX_SIZE=100)
def test_create_pdf_snapshot_fails_when_download_exceeds_limit(self):
bookmark = self.setup_bookmark(url="https://example.com/doc.pdf")
asset = assets.create_snapshot_asset(bookmark)
asset.save()
large_content = b"x" * 150 # Exceeds 100 byte limit
self.mock_detect_content_type.return_value = "application/pdf"
self.mock_is_pdf_content_type.return_value = True
with mock.patch("bookmarks.services.assets.requests.get") as mock_get:
# Response without Content-Length header, will fail during streaming
mock_get.return_value = self.create_mock_pdf_response(content=large_content)
with self.assertRaises(assets.PdfTooLargeError):
assets.create_snapshot(asset)
asset.refresh_from_db()
self.assertEqual(asset.status, BookmarkAsset.STATUS_FAILURE)
def test_create_pdf_snapshot_failure(self):
bookmark = self.setup_bookmark(url="https://example.com/doc.pdf")
asset = assets.create_snapshot_asset(bookmark)
asset.save()
self.mock_detect_content_type.return_value = "application/pdf"
self.mock_is_pdf_content_type.return_value = True
with mock.patch("bookmarks.services.assets.requests.get") as mock_get:
import requests
mock_get.side_effect = requests.RequestException("Download failed")
with self.assertRaises(requests.RequestException):
assets.create_snapshot(asset)
asset.refresh_from_db()
self.assertEqual(asset.status, BookmarkAsset.STATUS_FAILURE)
def test_upload_snapshot(self):
initial_modified = timezone.datetime(2025, 1, 1, 0, 0, 0, tzinfo=datetime.UTC)
bookmark = self.setup_bookmark(
url="https://example.com", modified=initial_modified
)
asset = assets.upload_snapshot(bookmark, self.html_content.encode())
# should create gzip file in asset folder
saved_file_name = self.get_saved_snapshot_file()
self.assertIsNotNone(saved_file_name)
# verify file name
self.assertTrue(saved_file_name.startswith("snapshot_"))
self.assertTrue(saved_file_name.endswith("_https___example.com.html.gz"))
# gzip file should contain the correct content
with gzip.open(os.path.join(self.assets_dir, saved_file_name), "rb") as gz_file:
self.assertEqual(gz_file.read().decode(), self.html_content)
# should create asset
self.assertIsNotNone(asset.id)
self.assertEqual(asset.bookmark, bookmark)
self.assertEqual(asset.asset_type, BookmarkAsset.TYPE_SNAPSHOT)
self.assertEqual(asset.content_type, BookmarkAsset.CONTENT_TYPE_HTML)
self.assertIn("HTML snapshot from", asset.display_name)
self.assertEqual(asset.status, BookmarkAsset.STATUS_COMPLETE)
self.assertEqual(asset.file, saved_file_name)
self.assertTrue(asset.gzip)
# should update bookmark modified date
bookmark.refresh_from_db()
self.assertGreater(bookmark.date_modified, initial_modified)
def test_upload_snapshot_failure(self):
bookmark = self.setup_bookmark(url="https://example.com")
# make gzip.open raise an exception
with mock.patch("gzip.open") as mock_gzip_open:
mock_gzip_open.side_effect = OSError("File operation failed")
with self.assertRaises(OSError):
assets.upload_snapshot(bookmark, b"invalid content")
# asset is not saved to the database
self.assertIsNone(BookmarkAsset.objects.first())
def test_upload_snapshot_truncates_asset_file_name(self):
# Create a bookmark with a very long URL
long_url = "http://" + "a" * 300 + ".com"
bookmark = self.setup_bookmark(url=long_url)
assets.upload_snapshot(bookmark, self.html_content.encode())
saved_file = self.get_saved_snapshot_file()
self.assertEqual(192, len(saved_file))
self.assertTrue(saved_file.startswith("snapshot_"))
self.assertTrue(saved_file.endswith("aaaa.html.gz"))
@disable_logging
def test_upload_asset(self):
initial_modified = timezone.datetime(2025, 1, 1, 0, 0, 0, tzinfo=datetime.UTC)
bookmark = self.setup_bookmark(modified=initial_modified)
file_content = b"test content"
upload_file = SimpleUploadedFile(
"test_file.txt", file_content, content_type="text/plain"
)
asset = assets.upload_asset(bookmark, upload_file)
# should create file in asset folder
saved_file_name = self.get_saved_snapshot_file()
self.assertIsNotNone(upload_file)
# verify file name
self.assertTrue(saved_file_name.startswith("upload_"))
self.assertTrue(saved_file_name.endswith("_test_file.txt.gz"))
# file should contain the correct content
self.assertEqual(self.read_asset_file(asset), file_content)
# should create asset
self.assertIsNotNone(asset.id)
self.assertEqual(asset.bookmark, bookmark)
self.assertEqual(asset.asset_type, BookmarkAsset.TYPE_UPLOAD)
self.assertEqual(asset.content_type, upload_file.content_type)
self.assertEqual(asset.display_name, upload_file.name)
self.assertEqual(asset.status, BookmarkAsset.STATUS_COMPLETE)
self.assertEqual(asset.file, saved_file_name)
self.assertEqual(asset.file_size, self.get_asset_filesize(asset))
self.assertTrue(asset.gzip)
# should update bookmark modified date
bookmark.refresh_from_db()
self.assertGreater(bookmark.date_modified, initial_modified)
@disable_logging
def test_upload_gzip_asset(self):
initial_modified = timezone.datetime(2025, 1, 1, 0, 0, 0, tzinfo=datetime.UTC)
bookmark = self.setup_bookmark(modified=initial_modified)
file_content = gzip.compress(b"test content")
upload_file = SimpleUploadedFile(
"test_file.html.gz", file_content, content_type="application/gzip"
)
asset = assets.upload_asset(bookmark, upload_file)
# should create file in asset folder
saved_file_name = self.get_saved_snapshot_file()
self.assertIsNotNone(upload_file)
# verify file name
self.assertTrue(saved_file_name.startswith("upload_"))
self.assertTrue(saved_file_name.endswith("_test_file.html.gz"))
# file should contain the correct content
self.assertEqual(self.read_asset_file(asset), file_content)
# should create asset
self.assertIsNotNone(asset.id)
self.assertEqual(asset.bookmark, bookmark)
self.assertEqual(asset.asset_type, BookmarkAsset.TYPE_UPLOAD)
self.assertEqual(asset.content_type, "application/gzip")
self.assertEqual(asset.display_name, upload_file.name)
self.assertEqual(asset.status, BookmarkAsset.STATUS_COMPLETE)
self.assertEqual(asset.file, saved_file_name)
self.assertEqual(asset.file_size, len(file_content))
self.assertFalse(asset.gzip)
# should update bookmark modified date
bookmark.refresh_from_db()
self.assertGreater(bookmark.date_modified, initial_modified)
@disable_logging
def test_upload_asset_truncates_asset_file_name(self):
# Create a bookmark with a very long URL
long_file_name = "a" * 300 + ".txt"
bookmark = self.setup_bookmark()
file_content = b"test content"
upload_file = SimpleUploadedFile(
long_file_name, file_content, content_type="text/plain"
)
assets.upload_asset(bookmark, upload_file)
saved_file = self.get_saved_snapshot_file()
self.assertEqual(192, len(saved_file))
self.assertTrue(saved_file.startswith("upload_"))
self.assertTrue(saved_file.endswith("aaaa.txt.gz"))
@disable_logging
def test_upload_asset_failure(self):
bookmark = self.setup_bookmark()
upload_file = SimpleUploadedFile("test_file.txt", b"test content")
# make open raise an exception
with mock.patch("builtins.open") as mock_open:
mock_open.side_effect = OSError("File operation failed")
with self.assertRaises(OSError):
assets.upload_asset(bookmark, upload_file)
# asset is not saved to the database
self.assertIsNone(BookmarkAsset.objects.first())
def test_create_snapshot_updates_bookmark_latest_snapshot(self):
bookmark = self.setup_bookmark(url="https://example.com")
first_asset = assets.create_snapshot_asset(bookmark)
first_asset.save()
assets.create_snapshot(first_asset)
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, first_asset)
second_asset = assets.create_snapshot_asset(bookmark)
second_asset.save()
assets.create_snapshot(second_asset)
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, second_asset)
def test_upload_snapshot_updates_bookmark_latest_snapshot(self):
bookmark = self.setup_bookmark(url="https://example.com")
first_asset = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, first_asset)
second_asset = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, second_asset)
self.assertNotEqual(bookmark.latest_snapshot, first_asset)
def test_create_snapshot_failure_does_not_update_latest_snapshot(self):
# Create a bookmark with an existing latest_snapshot
bookmark = self.setup_bookmark(url="https://example.com")
initial_snapshot = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
# Create a new snapshot asset that will fail
failing_asset = assets.create_snapshot_asset(bookmark)
failing_asset.save()
# Make the snapshot creation fail
self.mock_singlefile_create_snapshot.side_effect = RuntimeError(
"Snapshot creation failed"
)
# Attempt to create a snapshot (which will fail)
with self.assertRaises(RuntimeError):
assets.create_snapshot(failing_asset)
# Verify that the bookmark's latest_snapshot is still the initial snapshot
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
def test_upload_snapshot_failure_does_not_update_latest_snapshot(self):
# Create a bookmark with an existing latest_snapshot
bookmark = self.setup_bookmark(url="https://example.com")
initial_snapshot = assets.upload_snapshot(bookmark, self.html_content.encode())
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
# Make the gzip.open function fail
with mock.patch("gzip.open") as mock_gzip_open:
mock_gzip_open.side_effect = OSError("Upload failed")
# Attempt to upload a snapshot (which will fail)
with self.assertRaises(OSError):
assets.upload_snapshot(bookmark, b"New content")
# Verify that the bookmark's latest_snapshot is still the initial snapshot
bookmark.refresh_from_db()
self.assertEqual(bookmark.latest_snapshot, initial_snapshot)
def test_remove_latest_snapshot_updates_bookmark(self):
# Create a bookmark with multiple snapshots
bookmark = self.setup_bookmark()
# Create base time (1 hour ago)
base_time = timezone.now() - timedelta(hours=1)
# Create three snapshots with explicitly different dates
old_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="old_snapshot.html.gz",
date_created=base_time,
)
self.setup_asset_file(old_asset)
middle_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="middle_snapshot.html.gz",
date_created=base_time + timedelta(minutes=30),
)
self.setup_asset_file(middle_asset)
latest_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="latest_snapshot.html.gz",
date_created=base_time + timedelta(minutes=60),
)
self.setup_asset_file(latest_asset)
# Set the latest asset as the bookmark's latest_snapshot
bookmark.latest_snapshot = latest_asset
bookmark.save()
# Delete the latest snapshot
assets.remove_asset(latest_asset)
bookmark.refresh_from_db()
# Verify that middle_asset is now the latest_snapshot
self.assertEqual(bookmark.latest_snapshot, middle_asset)
# Delete the middle snapshot
assets.remove_asset(middle_asset)
bookmark.refresh_from_db()
# Verify that old_asset is now the latest_snapshot
self.assertEqual(bookmark.latest_snapshot, old_asset)
# Delete the last snapshot
assets.remove_asset(old_asset)
bookmark.refresh_from_db()
# Verify that latest_snapshot is now None
self.assertIsNone(bookmark.latest_snapshot)
def test_remove_non_latest_snapshot_does_not_affect_bookmark(self):
# Create a bookmark with multiple snapshots
bookmark = self.setup_bookmark()
# Create base time (1 hour ago)
base_time = timezone.now() - timedelta(hours=1)
# Create two snapshots with explicitly different dates
old_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="old_snapshot.html.gz",
date_created=base_time,
)
self.setup_asset_file(old_asset)
latest_asset = self.setup_asset(
bookmark=bookmark,
asset_type=BookmarkAsset.TYPE_SNAPSHOT,
status=BookmarkAsset.STATUS_COMPLETE,
file="latest_snapshot.html.gz",
date_created=base_time + timedelta(minutes=30),
)
self.setup_asset_file(latest_asset)
# Set the latest asset as the bookmark's latest_snapshot
bookmark.latest_snapshot = latest_asset
bookmark.save()
# Delete the old snapshot (not the latest)
assets.remove_asset(old_asset)
bookmark.refresh_from_db()
# Verify that latest_snapshot hasn't changed
self.assertEqual(bookmark.latest_snapshot, latest_asset)
@disable_logging
def test_remove_asset(self):
initial_modified = timezone.datetime(2025, 1, 1, 0, 0, 0, tzinfo=datetime.UTC)
bookmark = self.setup_bookmark(modified=initial_modified)
file_content = b"test content for removal"
upload_file = SimpleUploadedFile(
"test_remove_file.txt", file_content, content_type="text/plain"
)
asset = assets.upload_asset(bookmark, upload_file)
asset_filepath = os.path.join(self.assets_dir, asset.file)
# Verify asset and file exist
self.assertTrue(BookmarkAsset.objects.filter(id=asset.id).exists())
self.assertTrue(os.path.exists(asset_filepath))
bookmark.date_modified = initial_modified
bookmark.save()
# Remove the asset
assets.remove_asset(asset)
# Verify asset is removed from DB
self.assertFalse(BookmarkAsset.objects.filter(id=asset.id).exists())
# Verify file is removed from disk
self.assertFalse(os.path.exists(asset_filepath))
# Verify bookmark modified date is updated
bookmark.refresh_from_db()
self.assertGreater(bookmark.date_modified, initial_modified)