Commit c9611c03 authored by anarcat's avatar anarcat
Browse files

Merge branch 'obtuse' into 'main'

clarify a bit of obtuse logic

See merge request !12
parents 4124459e 0208ebba
Pipeline #20621 passed with stage
in 23 seconds
......@@ -389,106 +389,61 @@ class ProcessingClient(wc.Client):
logging.info("skipping non-existent resource %s/%s: %s", folder, path, e)
return
if path.endswith("/") and len(listing) <= 1:
logging.info("skipping empty folder: %s/%s", folder, path)
return
logging.info("sanitizing %s %s", folder, path)
# non-empty folder or regular file
if len(listing) > 1 or not path.endswith("/"):
remote_processing_path = join(folder, "dangerzone/processing", path)
logging.info(
"moving %s to %s before processing", path, remote_processing_path
)
if not self.dryrun:
self.mkdir(join(folder, "dangerzone"))
self.mkdir(join(folder, "dangerzone/processing"))
try:
self.move(
remote_path_from=join(folder, path),
remote_path_to=remote_processing_path,
)
except ResponseErrorCode as e:
# https://datatracker.ietf.org/doc/html/rfc7232#section-4.2
# actually used in WebDAV to show the file already exists
if e.code == 412:
logging.warning("file already being processed, skipping")
return
with tempfile.TemporaryDirectory() as tmpdir:
# TODO: sanitize path for local use
local_path = join(tmpdir, "danger", path)
# 3. download the file locally
logging.info("downloading %s to %s", remote_processing_path, local_path)
if not self.dryrun:
os.mkdir(join(tmpdir, "danger"))
self.download_sync(
remote_path=remote_processing_path, local_path=local_path
)
# 4. process the file with the dangerzone-converter
try:
if os.path.isdir(local_path):
self.sanitizer.sanitize_dir(local_path)
else:
self.sanitizer.sanitize_file(local_path)
except Sanitizer.ParseError:
# TODO: error handling:
# 5. on failure, delete the failed file locally, and
# move it to a dangerzone/rejected folder remotely
remote_rejected_path = join(folder, "dangerzone/rejected", path)
logging.warning(
"cannot process %s (%s), moving to %s",
path,
local_path,
remote_rejected_path,
)
if not self.dryrun:
self.mkdir(join(folder, "dangerzone/rejected"))
try:
self.move(
remote_path_from=remote_processing_path,
remote_path_to=remote_rejected_path,
)
except ResponseErrorCode as e:
# https://datatracker.ietf.org/doc/html/rfc7232#section-4.2
# actually used in WebDAV to show the file already exists
if e.code == 412:
# rejected already exists, fall back
# to delete the processing version
# altogether
#
# XXX: we actually lose data here
# which isn't nice. maybe we should
# find a unique filename instead?
self.clean(remote_processing_path)
remote_processing_path = join(folder, "dangerzone/processing", path)
logging.info("moving %s to %s before processing", path, remote_processing_path)
if not self.dryrun:
self.mkdir(join(folder, "dangerzone"))
self.mkdir(join(folder, "dangerzone/processing"))
try:
self.move(
remote_path_from=join(folder, path),
remote_path_to=remote_processing_path,
)
except ResponseErrorCode as e:
# https://datatracker.ietf.org/doc/html/rfc7232#section-4.2
# actually used in WebDAV to show the file already exists
if e.code == 412:
logging.warning("file already being processed, skipping")
return
# 6. on success, upload the sanitized file to a safe/
# folder, move the original to dangerzone/processed
remote_safe_directory = join(folder, "safe")
remote_safe_path = join(remote_safe_directory, path)
local_safe_path = join(self.sanitizer.safe_dir, path)
logging.info(
f"recursively creating remote safe directory {remote_safe_directory}"
with tempfile.TemporaryDirectory() as tmpdir:
# TODO: sanitize path for local use
local_path = join(tmpdir, "danger", path)
# 3. download the file locally
logging.info("downloading %s to %s", remote_processing_path, local_path)
if not self.dryrun:
os.mkdir(join(tmpdir, "danger"))
self.download_sync(
remote_path=remote_processing_path, local_path=local_path
)
if not self.dryrun:
self.mkdir_recursive(remote_safe_directory)
logging.info("uploading %s to %s", local_safe_path, remote_safe_path)
if not self.dryrun:
self.upload_sync(
# does that work when safe/ already has stuff?
remote_path=remote_safe_path,
local_path=local_safe_path,
)
remote_processed_path = join(folder, "dangerzone/processed", path)
logging.info(
"renaming %s to %s",
remote_processing_path,
remote_processed_path,
# 4. process the file with the dangerzone-converter
try:
if os.path.isdir(local_path):
self.sanitizer.sanitize_dir(local_path)
else:
self.sanitizer.sanitize_file(local_path)
except Sanitizer.ParseError:
# TODO: error handling:
# 5. on failure, delete the failed file locally, and
# move it to a dangerzone/rejected folder remotely
remote_rejected_path = join(folder, "dangerzone/rejected", path)
logging.warning(
"cannot process %s (%s), moving to %s",
path,
local_path,
remote_rejected_path,
)
if not self.dryrun:
# TODO: turn "dangerzone/processed" into a constant
self.mkdir(join(folder, "dangerzone/processed"))
self.mkdir(join(folder, "dangerzone/rejected"))
try:
self.move(
remote_path_from=remote_processing_path,
remote_path_to=remote_processed_path,
remote_path_to=remote_rejected_path,
)
except ResponseErrorCode as e:
# https://datatracker.ietf.org/doc/html/rfc7232#section-4.2
......@@ -502,6 +457,51 @@ class ProcessingClient(wc.Client):
# which isn't nice. maybe we should
# find a unique filename instead?
self.clean(remote_processing_path)
return
# 6. on success, upload the sanitized file to a safe/
# folder, move the original to dangerzone/processed
remote_safe_directory = join(folder, "safe")
remote_safe_path = join(remote_safe_directory, path)
local_safe_path = join(self.sanitizer.safe_dir, path)
logging.info(
f"recursively creating remote safe directory {remote_safe_directory}"
)
if not self.dryrun:
self.mkdir_recursive(remote_safe_directory)
logging.info("uploading %s to %s", local_safe_path, remote_safe_path)
if not self.dryrun:
self.upload_sync(
# does that work when safe/ already has stuff?
remote_path=remote_safe_path,
local_path=local_safe_path,
)
remote_processed_path = join(folder, "dangerzone/processed", path)
logging.info(
"renaming %s to %s",
remote_processing_path,
remote_processed_path,
)
if not self.dryrun:
# TODO: turn "dangerzone/processed" into a constant
self.mkdir(join(folder, "dangerzone/processed"))
try:
self.move(
remote_path_from=remote_processing_path,
remote_path_to=remote_processed_path,
)
except ResponseErrorCode as e:
# https://datatracker.ietf.org/doc/html/rfc7232#section-4.2
# actually used in WebDAV to show the file already exists
if e.code == 412:
# rejected already exists, fall back
# to delete the processing version
# altogether
#
# XXX: we actually lose data here
# which isn't nice. maybe we should
# find a unique filename instead?
self.clean(remote_processing_path)
if __name__ == "__main__":
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment