From 714493b59780a04bccd9f779fd60c0c5218d50a9 Mon Sep 17 00:00:00 2001 From: Rob McMullen Date: Sun, 24 Jun 2018 18:40:16 -0700 Subject: [PATCH] Refs #1: added gzip container as reference --- atrcopy/__init__.py | 7 +++++-- atrcopy/container.py | 24 ++++++++++++++++++++++-- atrcopy/dcm.py | 2 +- atrcopy/parsers.py | 10 ++++++++-- 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/atrcopy/__init__.py b/atrcopy/__init__.py index 23c9cb7..bd7b59f 100644 --- a/atrcopy/__init__.py +++ b/atrcopy/__init__.py @@ -58,13 +58,16 @@ def find_diskimage(filename): with open(filename, "rb") as fh: if options.verbose: print("Loading file %s" % filename) - rawdata = SegmentData(fh.read()) + data = to_numpy(fh.read()) parser = None try: - unpacked = guess_container(rawdata.data) + container = guess_container(data, options.verbose) + if container is not None: + data = container.unpacked except errors.UnsupportedContainer as e: print(f"{filename}: {e}") return None + rawdata = SegmentData(data) for mime in mime_parse_order: if options.verbose: print("Trying MIME type %s" % mime) diff --git a/atrcopy/container.py b/atrcopy/container.py index 338c4a5..330f25d 100644 --- a/atrcopy/container.py +++ b/atrcopy/container.py @@ -1,12 +1,32 @@ +import gzip +import io + import numpy as np from . import errors from .segments import SegmentData +from .utils import to_numpy class DiskImageContainer: def __init__(self, data): - self.unpacked = self.unpack_raw_data(data) + self.unpacked = self.__unpack_raw_data(data) - def unpack_raw_data(self, data): + def __unpack_raw_data(self, data): + raw = data.tobytes() + unpacked = self.unpack_bytes(raw) + return to_numpy(unpacked) + + def unpack_bytes(self, byte_data): pass + + +class GZipContainer(DiskImageContainer): + def unpack_bytes(self, byte_data): + try: + buf = io.BytesIO(byte_data) + with gzip.GzipFile(mode='rb', fileobj=buf) as f: + unpacked = f.read() + except OSError as e: + raise errors.InvalidContainer(e) + return unpacked diff --git a/atrcopy/dcm.py b/atrcopy/dcm.py index 6cd326f..0982f77 100644 --- a/atrcopy/dcm.py +++ b/atrcopy/dcm.py @@ -21,7 +21,7 @@ class DCMContainer(DiskImageContainer): self.index += 1 return data - def unpack_raw_data(self, data): + def unpack_bytes(self, data): self.index = 0 self.count = len(data) self.raw = data diff --git a/atrcopy/parsers.py b/atrcopy/parsers.py index 00cac65..69934a9 100644 --- a/atrcopy/parsers.py +++ b/atrcopy/parsers.py @@ -10,6 +10,7 @@ from .dos33 import Dos33DiskImage, ProdosDiskImage, Dos33BinFile from .standard_delivery import StandardDeliveryImage from . import errors from .magic import guess_detail_for_mime +from .container import GZipContainer from .dcm import DCMContainer import logging @@ -159,17 +160,22 @@ class ProdosSegmentParser(SegmentParser): known_containers = [ + GZipContainer, DCMContainer, ] -def guess_container(r): +def guess_container(r, verbose=False): for c in known_containers: + if verbose: + log.info(f"trying container {c}") try: found = c(r) - except errors.InvalidContainer: + except errors.InvalidContainer as e: continue else: + if verbose: + log.info(f"found container {c}") return found return None