Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 36 additions & 23 deletions server/mergin/sync/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,22 +743,21 @@ def diffs_chain(
return None, []

diffs = []
cached_items = Checkpoint.get_checkpoints(
basefile.project_version_name, version
)
checkpoints = Checkpoint.get_checkpoints(basefile.project_version_name, version)
expected_diffs = (
FileDiff.query.filter_by(
basefile_id=basefile.id,
)
.filter(
tuple_(FileDiff.rank, FileDiff.version).in_(
[(item.rank, item.end) for item in cached_items]
[(item.rank, item.end) for item in checkpoints]
)
)
.all()
)

for item in cached_items:
for item in checkpoints:
diff_needs_to_be_created = False
diff = next(
(
d
Expand All @@ -767,25 +766,38 @@ def diffs_chain(
),
None,
)
if diff and os.path.exists(diff.abs_path):
diffs.append(diff)
elif item.rank > 0:
# fallback if checkpoint does not exist: replace merged diff with individual diffs
individual_diffs = (
FileDiff.query.filter_by(
basefile_id=basefile.id,
rank=0,
if diff:
if os.path.exists(diff.abs_path):
diffs.append(diff)
else:
diff_needs_to_be_created = True
else:
# we do not have record in DB, create a checkpoint if it makes sense
if item.rank > 0 and FileDiff.can_create_checkpoint(file_id, item):
diff = FileDiff(
basefile=basefile,
version=item.end,
rank=item.rank,
path=basefile.file.generate_diff_name(),
size=None,
checksum=None,
)
.filter(
FileDiff.version >= item.start, FileDiff.version <= item.end
db.session.add(diff)
db.session.commit()
diff_needs_to_be_created = True
else:
# we asked for checkpoint where there was no change
continue

if diff_needs_to_be_created:
diff_created = diff.construct_checkpoint()
if diff_created:
diffs.append(diff)
else:
logging.error(
f"Failed to create a diff for file {basefile.file.path} at version {basefile.project_version_name} of rank {item.rank}."
)
.order_by(FileDiff.version)
.all()
)
diffs.extend(individual_diffs)
else:
# we asked for individual diff but there is no such diff as there was not change at that version
continue
return None, []

return basefile, diffs

Expand Down Expand Up @@ -924,9 +936,10 @@ def construct_checkpoint(self) -> bool:
return True

if self.rank == 0:
raise ValueError(
logging.error(
"Checkpoint of rank 0 should be created by user upload, cannot be constructed"
)
return False

# merged diffs can only be created for certain versions
if self.version % LOG_BASE:
Expand Down
58 changes: 41 additions & 17 deletions server/mergin/sync/public_api_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import logging
from dataclasses import asdict
from enum import Enum
from typing import Dict
from datetime import datetime

Expand Down Expand Up @@ -80,6 +81,7 @@
generate_location,
is_valid_uuid,
get_device_id,
is_versioned_file,
prepare_download_response,
get_device_id,
wkb2wkt,
Expand Down Expand Up @@ -287,6 +289,12 @@ def delete_project(namespace, project_name): # noqa: E501
return NoContent, 200


class DowloadFileAction(Enum):
FULL = "full"
FULL_GPKG = "full_gpkg"
DIFF = "diff"


def download_project_file(
project_name, namespace, file, version=None, diff=None
): # noqa: E501
Expand All @@ -307,10 +315,20 @@ def download_project_file(

:rtype: file
"""
project = require_project(namespace, project_name, ProjectPermissions.Read)
if diff and not version:
if not is_versioned_file(file):
action = DowloadFileAction.FULL
elif diff:
action = DowloadFileAction.DIFF
else:
action = DowloadFileAction.FULL_GPKG

if action is DowloadFileAction.DIFF and not version:
abort(400, f"Changeset must be requested for particular file version")

if action is DowloadFileAction.FULL and diff is True:
abort(404, f"No diff in particular file {file})")

project = require_project(namespace, project_name, ProjectPermissions.Read)
lookup_version = (
ProjectVersion.from_v_name(version) if version else project.latest_version
)
Expand All @@ -329,24 +347,30 @@ def download_project_file(
if not fh or fh.change == PushChangeType.DELETE.value:
abort(404, f"File {file} not found")

if diff and version:
# get specific version of geodiff file modified in requested version
if not fh.diff:
abort(404, f"No diff in particular file {file} version")
file_path = fh.diff_file.location
else:
file_path = fh.location

if version and not diff:
project.storage.restore_versioned_file(
file, ProjectVersion.from_v_name(version)
)
# user asked for diff, but there is no diff at that version
if action is DowloadFileAction.DIFF and not fh.diff:
abort(404, f"No diff in particular file {file} version")

file_path = (
fh.diff_file.location if action is DowloadFileAction.DIFF else fh.location
)
abs_path = os.path.join(project.storage.project_dir, file_path)
# check file exists (e.g. there might have been issue with restore)

if not os.path.exists(abs_path):
logging.error(f"Missing file {namespace}/{project_name}/{file_path}")
abort(404)
if action is DowloadFileAction.FULL_GPKG:
project.storage.restore_versioned_file(
file, ProjectVersion.from_v_name(version)
)

# check again after restore
if not os.path.exists(abs_path):
logging.error(
f"Failed to restore {namespace}/{project_name}/{file_path}"
)
abort(404)
else:
logging.error(f"Missing file {namespace}/{project_name}/{file_path}")
abort(404)

response = prepare_download_response(project.storage.project_dir, file_path)
return response
Expand Down
62 changes: 61 additions & 1 deletion server/mergin/tests/test_file_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@

from ..app import db
from ..auth.models import User
from ..sync.models import ProjectVersion, Project, GeodiffActionHistory
from ..sync.models import (
FileDiff,
ProjectFilePath,
ProjectVersion,
Project,
GeodiffActionHistory,
)
from . import test_project_dir, TMP_DIR
from .utils import (
create_project,
Expand Down Expand Up @@ -163,6 +169,19 @@ def test_version_file_restore(diff_project):
diff_project.storage.restore_versioned_file("base.gpkg", 7)
assert os.path.exists(test_file)
assert gpkgs_are_equal(test_file, test_file + "_backup")
# no merged diffs needed
file_path_id = (
ProjectFilePath.query.filter_by(project_id=diff_project.id, path="base.gpkg")
.first()
.id
)
assert (
FileDiff.query.filter_by(file_path_id=file_path_id)
.filter(FileDiff.rank > 0)
.count()
== 0
)

# check we track performance of reconstruction
gh = GeodiffActionHistory.query.filter_by(
project_id=diff_project.id, target_version="v7"
Expand Down Expand Up @@ -198,3 +217,44 @@ def test_version_file_restore(diff_project):
diff_project.storage.restore_versioned_file("test.txt", 1)
assert not os.path.exists(test_file)
assert not os.path.exists(diff_project.storage.geodiff_working_dir)

# let's add some dummy changes to test.gpkg so we can restore full gpkg using checkpoints created on demand
file_path_id = (
ProjectFilePath.query.filter_by(project_id=diff_project.id, path="test.gpkg")
.first()
.id
)
base_gpkg = os.path.join(diff_project.storage.project_dir, "test.gpkg")
shutil.copy(
os.path.join(diff_project.storage.project_dir, "v9", "test.gpkg"), base_gpkg
)
for i in range(23):
sql = f"UPDATE simple SET rating={i}"
execute_query(base_gpkg, sql)
pv = push_change(
diff_project, "updated", "test.gpkg", diff_project.storage.project_dir
)
assert diff_project.latest_version == pv.name == (11 + i)
file_diff = FileDiff.query.filter_by(
file_path_id=file_path_id, version=pv.name, rank=0
).first()
assert file_diff and os.path.exists(file_diff.abs_path)

assert (
FileDiff.query.filter_by(file_path_id=file_path_id)
.filter(FileDiff.rank > 0)
.count()
== 0
)

test_file = os.path.join(diff_project.storage.project_dir, "v30", "test.gpkg")
os.rename(test_file, test_file + "_backup")
diff_project.storage.restore_versioned_file("test.gpkg", 30)
assert os.path.exists(test_file)
assert gpkgs_are_equal(test_file, test_file + "_backup")
assert (
FileDiff.query.filter_by(file_path_id=file_path_id)
.filter(FileDiff.rank > 0)
.count()
> 0
)
18 changes: 16 additions & 2 deletions server/mergin/tests/test_project_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,14 @@ def test_download_diff_file(client, diff_project):

# download full version after file was removed
os.remove(os.path.join(diff_project.storage.project_dir, file_change.location))
file_path_id = (
ProjectFilePath.query.filter_by(project_id=diff_project.id, path=test_file)
.first()
.id
)
basefile, diffs = FileHistory.diffs_chain(file_path_id, 4)
# we construct full gpkg from basefile and single diff v4
assert basefile is not None and len(diffs) == 1
resp = client.get(
"/v1/project/raw/{}/{}?file={}&version=v4".format(
test_workspace_name, test_project, test_file
Expand Down Expand Up @@ -1902,11 +1910,17 @@ def test_file_diffs_chain(diff_project):
assert len(diffs) == 1
assert diffs[0].version == 6

# diff was used in v7, nothing happened in v8 (=v7)
basefile, diffs = FileHistory.diffs_chain(file_id, 8)
# diff was used in v7
basefile, diffs = FileHistory.diffs_chain(file_id, 7)
assert basefile.version.name == 5
assert len(diffs) == 2

# nothing happened in v8 (=v7) but we have now merged diff in chain v5-v8
basefile, diffs = FileHistory.diffs_chain(file_id, 8)
assert basefile.version.name == 5
assert len(diffs) == 1
assert diffs[0].rank == 1 and diffs[0].version == 8

# file was removed in v9
basefile, diffs = FileHistory.diffs_chain(file_id, 9)
assert not basefile
Expand Down
45 changes: 16 additions & 29 deletions server/mergin/tests/test_public_api_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,45 +304,31 @@ def test_create_diff_checkpoint(diff_project):

basefile, diffs = FileHistory.diffs_chain(file_path_id, 32)
assert basefile.project_version_name == 9
# so far we only have individual diffs
assert len(diffs) == 22
assert len(diffs) == 3
# also a lower diff rank was created
lower_diff = FileDiff.query.filter_by(version=24, rank=1).first()
assert os.path.exists(lower_diff.abs_path)

# diff for v17-v20 from individual diffs
assert FileDiff.can_create_checkpoint(file_path_id, Checkpoint(1, 5)) is True
diff = FileDiff(
basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=20, rank=1
)
db.session.add(diff)
db.session.commit()
assert not os.path.exists(diff.abs_path)
diff = FileDiff.query.filter_by(
file_path_id=file_path_id, version=20, rank=1
).first()
os.remove(diff.abs_path)
diff.construct_checkpoint()
assert os.path.exists(diff.abs_path)

basefile, diffs = FileHistory.diffs_chain(file_path_id, 20)
assert basefile.project_version_name == 9
# 6 individual diffs (v11-v16) + merged diff (v17-v20) as the last one
assert len(diffs) == 7
# individual diff v12 + (v13-v16) + (v17-v20) as the last one
assert len(diffs) == 3
assert diffs[-1] == diff

# repeat - nothing to do
mtime = os.path.getmtime(diff.abs_path)
diff.construct_checkpoint()
assert mtime == os.path.getmtime(diff.abs_path)

# some lower rank diffs still missing
assert not FileDiff.query.filter_by(version=24, rank=1).count()

# diff for v17-v32 with merged diffs, this will also create lower missing ranks
diff = FileDiff(
basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=32, rank=2
)
db.session.add(diff)
db.session.commit()
diff.construct_checkpoint()
assert os.path.exists(diff.abs_path)
lower_diff = FileDiff.query.filter_by(version=24, rank=1).first()
assert os.path.exists(lower_diff.abs_path)

# assert gpkg diff is the same as it would be from merging all individual diffs
individual_diffs = (
FileDiff.query.filter_by(file_path_id=file_path_id, rank=0)
Expand All @@ -368,11 +354,12 @@ def test_create_diff_checkpoint(diff_project):

# geodiff failure
mock.side_effect = GeoDiffLibError
diff = FileDiff(
basefile=basefile, path=f"test.gpkg-diff-{uuid.uuid4()}", version=16, rank=1
)
db.session.add(diff)
db.session.commit()
diff = FileDiff.query.filter_by(
file_path_id=file_path_id, version=16, rank=1
).first()
assert os.path.exists(diff.abs_path)
os.remove(diff.abs_path)

diff.construct_checkpoint()
assert mock.called
assert not os.path.exists(diff.abs_path)
Expand Down
Loading