mirror of
https://github.com/robmcmullen/atrcopy.git
synced 2025-04-10 15:39:17 +00:00
In progress change to entry point containers, parsers
This commit is contained in:
parent
3cc72e0ad9
commit
b35361b125
@ -17,10 +17,12 @@ from . import errors
|
||||
from .ataridos import AtrHeader, AtariDosDiskImage, BootDiskImage, AtariDosFile, XexContainerSegment, get_xex, add_atr_header
|
||||
from .dos33 import Dos33DiskImage
|
||||
from .kboot import KBootImage, add_xexboot_header
|
||||
from .segments import SegmentData, SegmentSaver, DefaultSegment, EmptySegment, ObjSegment, RawSectorsSegment, SegmentedFileSegment, user_bit_mask, match_bit_mask, comment_bit_mask, data_style, selected_bit_mask, diff_bit_mask, not_user_bit_mask, interleave_segments, SegmentList, get_style_mask, get_style_bits
|
||||
from .segments import SegmentData, SegmentSaver, DefaultSegment, EmptySegment, ObjSegment, RawSectorsSegment, SegmentedFileSegment, interleave_segments, SegmentList
|
||||
from .style_bits import user_bit_mask, match_bit_mask, comment_bit_mask, data_style, selected_bit_mask, diff_bit_mask, not_user_bit_mask, get_style_mask, get_style_bits
|
||||
from .spartados import SpartaDosDiskImage
|
||||
from .cartridge import A8CartHeader, AtariCartImage, RomImage
|
||||
from .parsers import SegmentParser, DefaultSegmentParser, guess_parser_by_size, guess_parser_for_mime, guess_parser_for_system, guess_container, iter_parsers, iter_known_segment_parsers, mime_parse_order, parsers_for_filename
|
||||
from .parser import SegmentParser, DefaultSegmentParser, guess_parser_by_size, guess_parser_for_mime, guess_parser_for_system, iter_parsers, iter_known_segment_parsers, mime_parse_order, parsers_for_filename
|
||||
from .container import guess_container
|
||||
from .magic import guess_detail_for_mime
|
||||
from .utils import to_numpy, text_to_int
|
||||
from .dummy import LocalFilesystem
|
||||
|
@ -2,7 +2,8 @@ import numpy as np
|
||||
|
||||
from . import errors
|
||||
from .diskimages import DiskImageBase, BaseHeader, Bootable
|
||||
from .segments import SegmentData, EmptySegment, ObjSegment, RawSectorsSegment, DefaultSegment, SegmentedFileSegment, SegmentSaver, get_style_bits
|
||||
from .segments import SegmentData, EmptySegment, ObjSegment, RawSectorsSegment, DefaultSegment, SegmentedFileSegment, SegmentSaver
|
||||
from .style_bits import get_style_bits
|
||||
from .utils import *
|
||||
from .executables import get_xex
|
||||
|
||||
|
@ -1,33 +1,112 @@
|
||||
import gzip
|
||||
import bz2
|
||||
import lzma
|
||||
import io
|
||||
import hashlib
|
||||
import inspect
|
||||
import pkg_resources
|
||||
|
||||
import numpy as np
|
||||
|
||||
from . import errors
|
||||
from .segments import SegmentData
|
||||
from .utils import to_numpy
|
||||
from . import style_bits
|
||||
from .utils import to_numpy, to_numpy_list, uuid
|
||||
|
||||
import logging
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DiskImageContainer:
|
||||
"""Unpacker for disk image compression.
|
||||
"""Disk image data storage and unpacker for disk image compression.
|
||||
|
||||
Disk images may be compressed by any number of techniques. Subclasses of
|
||||
DiskImageContainer implement the `unpack_bytes` method which examines the
|
||||
byte_data argument for the supported compression type, and if valid returns
|
||||
the unpacked bytes to be used in the disk image parsing.
|
||||
Segments point to this container and refer to the container's data rather
|
||||
than store copies.
|
||||
|
||||
Disk images may be stored as raw data or can be compressed by any number of
|
||||
techniques. Subclasses of DiskImageContainer implement the `unpack_bytes`
|
||||
method which examines the byte_data argument for the supported compression
|
||||
type, and if valid returns the unpacked bytes to be used in the disk image
|
||||
parsing.
|
||||
"""
|
||||
def __init__(self, data):
|
||||
self.unpacked = self.__unpack_raw_data(data)
|
||||
can_resize_default = False
|
||||
|
||||
def __unpack_raw_data(self, data):
|
||||
raw = data.tobytes()
|
||||
base_serializable_attributes = ['origin', 'error', 'name', 'verbose_name', 'uuid', 'can_resize']
|
||||
extra_serializable_attributes = []
|
||||
|
||||
def __init__(self, data, style=None, origin=0, name="All", error=None, verbose_name=None, memory_map=None):
|
||||
self._data = None
|
||||
self._style = None
|
||||
self.set_data(data, style)
|
||||
|
||||
self.origin = int(origin) # force python int to decouple from possibly being a numpy datatype
|
||||
self.error = error
|
||||
self.name = name
|
||||
self.verbose_name = verbose_name
|
||||
self.uuid = uuid()
|
||||
if memory_map is None:
|
||||
memory_map = {}
|
||||
self.memory_map = memory_map
|
||||
self.comments = dict()
|
||||
self.user_data = dict()
|
||||
for i in range(1, style_bits.user_bit_mask):
|
||||
self.user_data[i] = dict()
|
||||
|
||||
# Some segments may be resized to contain additional segments not
|
||||
# present when the segment was created.
|
||||
self.can_resize = self.__class__.can_resize_default
|
||||
|
||||
#### initialization
|
||||
|
||||
def set_data(self, data, style):
|
||||
self.data = data
|
||||
self.style = style
|
||||
|
||||
#### properties
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
return self._data
|
||||
|
||||
@data.setter
|
||||
def data(self, value):
|
||||
if self._data is not None:
|
||||
raise errors.ReadOnlyContainer("Container already populated with data")
|
||||
raw = value.tobytes()
|
||||
try:
|
||||
unpacked = self.unpack_bytes(raw)
|
||||
except EOFError as e:
|
||||
raise errors.InvalidContainer(e)
|
||||
return to_numpy(unpacked)
|
||||
self._data = to_numpy(unpacked)
|
||||
|
||||
@property
|
||||
def style(self):
|
||||
return self._style
|
||||
|
||||
@style.setter
|
||||
def style(self, value):
|
||||
if value is None:
|
||||
value = np.zeros(len(self._data), dtype=np.uint8)
|
||||
self._style = to_numpy(value)
|
||||
|
||||
@property
|
||||
def sha1(self):
|
||||
return hashlib.sha1(self.data).digest()
|
||||
|
||||
#### dunder methods
|
||||
|
||||
def __len__(self):
|
||||
return np.alen(self._data)
|
||||
|
||||
def __and__(self, other):
|
||||
return self._data & other
|
||||
|
||||
def __iand__(self, other):
|
||||
self._data &= other
|
||||
return self
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self._data[index]
|
||||
|
||||
def __setitem__(self, index, value):
|
||||
self._data[index] = value
|
||||
|
||||
#### unpacking
|
||||
|
||||
def unpack_bytes(self, byte_data):
|
||||
"""Attempt to unpack `byte_data` using this unpacking algorithm.
|
||||
@ -46,37 +125,148 @@ class DiskImageContainer:
|
||||
the data was indeed recognized by this subclass (despite not being
|
||||
unpacked) and checking further containers is not necessary.
|
||||
"""
|
||||
pass
|
||||
return byte_data
|
||||
|
||||
#### packing
|
||||
|
||||
def pack_data(self, np_data):
|
||||
"""Pack `np_data` using this packing algorithm
|
||||
|
||||
`np_data` is numpy data, as this function is xpected to be called from
|
||||
the data held in a SourceSegment
|
||||
"""
|
||||
return np_data
|
||||
|
||||
#### serialization
|
||||
|
||||
def __getstate__(self):
|
||||
"""Custom jsonpickle state save routine
|
||||
|
||||
This routine culls down the list of attributes that should be
|
||||
serialized, and in some cases changes their format slightly so they
|
||||
have a better mapping to json objects. For instance, json can't handle
|
||||
dicts with integer keys, so dicts are turned into lists of lists.
|
||||
Tuples are also turned into lists because tuples don't have a direct
|
||||
representation in json, while lists have a compact representation in
|
||||
json.
|
||||
"""
|
||||
state = dict()
|
||||
for key in self.base_serializable_attributes:
|
||||
state[key] = getattr(self, key)
|
||||
for key in self.extra_serializable_attributes:
|
||||
state[key] = getattr(self, key)
|
||||
r = self.rawdata
|
||||
state['memory_map'] = sorted([list(i) for i in self.memory_map.items()])
|
||||
state['comment ranges'] = [list(a) for a in self.get_style_ranges(comment=True)]
|
||||
state['data ranges'] = [list(a) for a in self.get_style_ranges(data=True)]
|
||||
for i in range(1, style_bits.user_bit_mask):
|
||||
r = [list(a) for a in self.get_style_ranges(user=i)]
|
||||
if r:
|
||||
slot = "user style %d" % i
|
||||
state[slot] = r
|
||||
|
||||
# json serialization doesn't allow int keys, so convert to list of
|
||||
# pairs
|
||||
state['comments'] = self.get_sorted_comments()
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
"""Custom jsonpickle state restore routine
|
||||
|
||||
The use of jsonpickle to recreate objects doesn't go through __init__,
|
||||
so there will be missing attributes when restoring old versions of the
|
||||
json. Once a version gets out in the wild and additional attributes are
|
||||
added to a segment, a default value should be applied here.
|
||||
"""
|
||||
self.memory_map = dict(state.pop('memory_map', []))
|
||||
self.uuid = state.pop('uuid', uuid())
|
||||
self.can_resize = state.pop('can_resize', self.__class__.can_resize_default)
|
||||
comments = state.pop('comments', {})
|
||||
for k, v in e['comments']:
|
||||
self.comments[k] = v
|
||||
ranges = state.pop('comment ranges')
|
||||
if 'comment ranges' in e:
|
||||
self.set_style_ranges(e['comment ranges'], comment=True)
|
||||
if 'data ranges' in e:
|
||||
self.set_style_ranges(e['data ranges'], user=data_style)
|
||||
if 'display list ranges' in e:
|
||||
# DEPRECATED, but supported on read. Converts display list to
|
||||
# disassembly type 0 for user index 1
|
||||
self.set_style_ranges(e['display list ranges'], data=True, user=1)
|
||||
self.set_user_data(e['display list ranges'], 1, 0)
|
||||
if 'user ranges 1' in e:
|
||||
# DEPRECATED, but supported on read. Converts user extra data 0
|
||||
# (antic dl), 1 (jumpman level), and 2 (jumpman harvest) to user
|
||||
# styles 2, 3, and 4. Data is now user style 1.
|
||||
for r, val in e['user ranges 1']:
|
||||
self.set_style_ranges([r], user=val + 2)
|
||||
for i in range(1, style_bits.user_bit_mask):
|
||||
slot = "user style %d" % i
|
||||
if slot in e:
|
||||
self.set_style_ranges(e[slot], user=i)
|
||||
self.restore_missing_serializable_defaults()
|
||||
self.__dict__.update(state)
|
||||
self.restore_renamed_serializable_attributes()
|
||||
|
||||
#### style
|
||||
|
||||
def set_style_at_indexes(self, indexes, **kwargs):
|
||||
style_bits = get_style_bits(**kwargs)
|
||||
self._style[indexes] |= style_bits
|
||||
|
||||
def clear_style_at_indexes(self, indexes, **kwargs):
|
||||
style_mask = get_style_mask(**kwargs)
|
||||
self.style[indexes] &= style_mask
|
||||
|
||||
def get_style_at_indexes(self, **kwargs):
|
||||
"""Return a list of start, end pairs that match the specified style
|
||||
"""
|
||||
style_bits = self.get_style_bits(**kwargs)
|
||||
matches = (self._style & style_bits) == style_bits
|
||||
return self.bool_to_ranges(matches)
|
||||
|
||||
def fixup_comments(self):
|
||||
"""Remove any style bytes that are marked as commented but have no
|
||||
comment, and add any style bytes where there's a comment but it isn't
|
||||
marked in the style data.
|
||||
|
||||
This happens on the base data, so only need to do this on one segment
|
||||
that uses this base data.
|
||||
"""
|
||||
style_base = self.rawdata.style_base
|
||||
comment_text_indexes = np.asarray(list(self.rawdata.extra.comments.keys()), dtype=np.uint32)
|
||||
comment_mask = self.get_style_mask(comment=True)
|
||||
has_comments = np.where(style_base & style_bits.comment_bit_mask > 0)[0]
|
||||
both = np.intersect1d(comment_text_indexes, has_comments)
|
||||
log.info("fixup comments: %d correctly marked, %d without style, %d empty text" % (np.alen(both), np.alen(comment_text_indexes) - np.alen(both), np.alen(has_comments) - np.alen(both)))
|
||||
style_base &= comment_mask
|
||||
comment_style = self.get_style_bits(comment=True)
|
||||
style_base[comment_text_indexes] |= comment_style
|
||||
|
||||
|
||||
class GZipContainer(DiskImageContainer):
|
||||
def unpack_bytes(self, byte_data):
|
||||
def find_containers():
|
||||
containers = []
|
||||
for entry_point in pkg_resources.iter_entry_points('atrcopy.containers'):
|
||||
mod = entry_point.load()
|
||||
log.debug(f"find_container: Found module {entry_point.name}={mod.__name__}")
|
||||
for name, obj in inspect.getmembers(mod):
|
||||
if inspect.isclass(obj) and DiskImageContainer in obj.__mro__[1:]:
|
||||
log.debug(f"find_containers: found container class {name}")
|
||||
containers.append(obj)
|
||||
return containers
|
||||
|
||||
|
||||
def guess_container(r, verbose=False):
|
||||
for c in find_containers():
|
||||
if verbose:
|
||||
log.info(f"trying container {c}")
|
||||
try:
|
||||
buf = io.BytesIO(byte_data)
|
||||
with gzip.GzipFile(mode='rb', fileobj=buf) as f:
|
||||
unpacked = f.read()
|
||||
except OSError as e:
|
||||
raise errors.InvalidContainer(e)
|
||||
return unpacked
|
||||
|
||||
|
||||
class BZipContainer(DiskImageContainer):
|
||||
def unpack_bytes(self, byte_data):
|
||||
try:
|
||||
buf = io.BytesIO(byte_data)
|
||||
with bz2.BZ2File(buf, mode='rb') as f:
|
||||
unpacked = f.read()
|
||||
except OSError as e:
|
||||
raise errors.InvalidContainer(e)
|
||||
return unpacked
|
||||
|
||||
|
||||
class LZMAContainer(DiskImageContainer):
|
||||
def unpack_bytes(self, byte_data):
|
||||
try:
|
||||
buf = io.BytesIO(byte_data)
|
||||
with lzma.LZMAFile(buf, mode='rb') as f:
|
||||
unpacked = f.read()
|
||||
except lzma.LZMAError as e:
|
||||
raise errors.InvalidContainer(e)
|
||||
return unpacked
|
||||
found = c(r)
|
||||
except errors.InvalidContainer as e:
|
||||
continue
|
||||
else:
|
||||
if verbose:
|
||||
log.info(f"found container {c}")
|
||||
return found
|
||||
log.info(f"image does not appear to be compressed.")
|
||||
return DiskImageContainer(r)
|
||||
|
0
atrcopy/containers/__init__.py
Normal file
0
atrcopy/containers/__init__.py
Normal file
18
atrcopy/containers/bzip.py
Normal file
18
atrcopy/containers/bzip.py
Normal file
@ -0,0 +1,18 @@
|
||||
import bz2
|
||||
import io
|
||||
|
||||
import numpy as np
|
||||
|
||||
from . import errors
|
||||
from .utils import to_numpy
|
||||
|
||||
|
||||
class BZipContainer(DiskImageContainer):
|
||||
def unpack_bytes(self, byte_data):
|
||||
try:
|
||||
buf = io.BytesIO(byte_data)
|
||||
with bz2.BZ2File(buf, mode='rb') as f:
|
||||
unpacked = f.read()
|
||||
except OSError as e:
|
||||
raise errors.InvalidContainer(e)
|
||||
return unpacked
|
@ -1,8 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
from . import errors
|
||||
from .container import DiskImageContainer
|
||||
from .segments import SegmentData
|
||||
from .. import errors
|
||||
from ..container import DiskImageContainer
|
||||
|
||||
|
||||
class DCMContainer(DiskImageContainer):
|
18
atrcopy/containers/gzip.py
Normal file
18
atrcopy/containers/gzip.py
Normal file
@ -0,0 +1,18 @@
|
||||
import gzip
|
||||
import io
|
||||
|
||||
import numpy as np
|
||||
|
||||
from . import errors
|
||||
from .utils import to_numpy
|
||||
|
||||
|
||||
class GZipContainer(DiskImageContainer):
|
||||
def unpack_bytes(self, byte_data):
|
||||
try:
|
||||
buf = io.BytesIO(byte_data)
|
||||
with gzip.GzipFile(mode='rb', fileobj=buf) as f:
|
||||
unpacked = f.read()
|
||||
except OSError as e:
|
||||
raise errors.InvalidContainer(e)
|
||||
return unpacked
|
18
atrcopy/containers/lzma.py
Normal file
18
atrcopy/containers/lzma.py
Normal file
@ -0,0 +1,18 @@
|
||||
import lzma
|
||||
import io
|
||||
|
||||
import numpy as np
|
||||
|
||||
from . import errors
|
||||
from .utils import to_numpy
|
||||
|
||||
|
||||
class LZMAContainer(DiskImageContainer):
|
||||
def unpack_bytes(self, byte_data):
|
||||
try:
|
||||
buf = io.BytesIO(byte_data)
|
||||
with lzma.LZMAFile(buf, mode='rb') as f:
|
||||
unpacked = f.read()
|
||||
except lzma.LZMAError as e:
|
||||
raise errors.InvalidContainer(e)
|
||||
return unpacked
|
@ -1,7 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
from . import errors
|
||||
from .segments import SegmentData, EmptySegment, ObjSegment, RawSectorsSegment
|
||||
from .segments import EmptySegment, ObjSegment, RawSectorsSegment
|
||||
from .utils import *
|
||||
from .executables import create_executable_file_data
|
||||
|
||||
|
@ -3,7 +3,8 @@ import numpy as np
|
||||
from . import errors
|
||||
from .diskimages import BaseHeader, DiskImageBase, Bootable
|
||||
from .utils import Directory, VTOC, WriteableSector, BaseSectorList, Dirent
|
||||
from .segments import DefaultSegment, EmptySegment, ObjSegment, RawTrackSectorSegment, SegmentSaver, get_style_bits, SegmentData
|
||||
from .segments import DefaultSegment, EmptySegment, ObjSegment, RawTrackSectorSegment, SegmentSaver, SegmentData
|
||||
from .style_bits import get_style_bits
|
||||
from .executables import get_bsave
|
||||
|
||||
import logging
|
||||
|
@ -2,6 +2,14 @@ class AtrError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidSegmentLength(AtrError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidSegmentOrder(AtrError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidAtrHeader(AtrError):
|
||||
pass
|
||||
|
||||
@ -71,5 +79,9 @@ class UnsupportedContainer(AtrError):
|
||||
pass
|
||||
|
||||
|
||||
class ReadOnlyContainer(AtrError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidContainer(AtrError):
|
||||
pass
|
||||
|
@ -1,7 +1,8 @@
|
||||
import numpy as np
|
||||
|
||||
from . import errors
|
||||
from .segments import SegmentData, EmptySegment, ObjSegment, RawSectorsSegment, DefaultSegment, SegmentedFileSegment, SegmentSaver, get_style_bits
|
||||
from .segments import SegmentData, EmptySegment, ObjSegment, RawSectorsSegment, DefaultSegment, SegmentedFileSegment, SegmentSaver
|
||||
from .style_bits import get_style_bits
|
||||
from .utils import *
|
||||
|
||||
import logging
|
||||
|
@ -1,695 +0,0 @@
|
||||
import bisect
|
||||
import io
|
||||
|
||||
import numpy as np
|
||||
|
||||
from . import errors
|
||||
from .utils import to_numpy, to_numpy_list, uuid
|
||||
from functools import reduce
|
||||
|
||||
user_bit_mask = 0x07
|
||||
data_style = 0x1
|
||||
not_user_bit_mask = 0xff ^ user_bit_mask
|
||||
diff_bit_mask = 0x10
|
||||
match_bit_mask = 0x20
|
||||
comment_bit_mask = 0x40
|
||||
selected_bit_mask = 0x80
|
||||
|
||||
import logging
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_style_bits(match=False, comment=False, selected=False, data=False, diff=False, user=0):
|
||||
""" Return an int value that contains the specified style bits set.
|
||||
|
||||
Available styles for each byte are:
|
||||
|
||||
match: part of the currently matched search
|
||||
comment: user commented area
|
||||
selected: selected region
|
||||
data: labeled in the disassembler as a data region (i.e. not disassembled)
|
||||
"""
|
||||
style_bits = 0
|
||||
if user:
|
||||
style_bits |= (user & user_bit_mask)
|
||||
if diff:
|
||||
style_bits |= diff_bit_mask
|
||||
if match:
|
||||
style_bits |= match_bit_mask
|
||||
if comment:
|
||||
style_bits |= comment_bit_mask
|
||||
if data:
|
||||
style_bits |= (data_style & user_bit_mask)
|
||||
if selected:
|
||||
style_bits |= selected_bit_mask
|
||||
return style_bits
|
||||
|
||||
|
||||
def get_style_mask(**kwargs):
|
||||
"""Get the bit mask that, when anded with data, will turn off the
|
||||
selected bits
|
||||
"""
|
||||
bits = get_style_bits(**kwargs)
|
||||
if 'user' in kwargs and kwargs['user']:
|
||||
bits |= user_bit_mask
|
||||
else:
|
||||
bits &= (0xff ^ user_bit_mask)
|
||||
return 0xff ^ bits
|
||||
|
||||
|
||||
class SourceSegment:
|
||||
can_resize_default = False
|
||||
|
||||
base_serializable_attributes = ['origin', 'error', 'name', 'verbose_name', 'uuid', 'can_resize']
|
||||
extra_serializable_attributes = []
|
||||
|
||||
def __init__(self, data, style=None, origin=0, name="All", error=None, verbose_name=None, memory_map=None):
|
||||
self.origin = int(origin) # force python int to decouple from possibly being a numpy datatype
|
||||
self.error = error
|
||||
self.name = name
|
||||
self.verbose_name = verbose_name
|
||||
self.uuid = uuid()
|
||||
if memory_map is None:
|
||||
memory_map = {}
|
||||
self.memory_map = memory_map
|
||||
self.comments = dict()
|
||||
self.user_data = dict()
|
||||
for i in range(1, user_bit_mask):
|
||||
self.user_data[i] = dict()
|
||||
|
||||
# Some segments may be resized to contain additional segments not
|
||||
# present when the segment was created.
|
||||
self.can_resize = self.__class__.can_resize_default
|
||||
|
||||
self.set_data(data, style)
|
||||
|
||||
def set_data(self, data, style):
|
||||
self._data = to_numpy(data)
|
||||
if style is None:
|
||||
style = np.zeros(len(self._data), dtype=np.uint8)
|
||||
self._style = to_numpy(style)
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
return self._data
|
||||
|
||||
@property
|
||||
def style(self):
|
||||
return self._style
|
||||
|
||||
def __len__(self):
|
||||
return np.alen(self._data)
|
||||
|
||||
def __and__(self, other):
|
||||
return self._data & other
|
||||
|
||||
def __iand__(self, other):
|
||||
self._data &= other
|
||||
return self
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self._data[index]
|
||||
|
||||
def __setitem__(self, index, value):
|
||||
self._data[index] = value
|
||||
|
||||
#### serialization
|
||||
|
||||
def __getstate__(self):
|
||||
"""Custom jsonpickle state save routine
|
||||
|
||||
This routine culls down the list of attributes that should be
|
||||
serialized, and in some cases changes their format slightly so they
|
||||
have a better mapping to json objects. For instance, json can't handle
|
||||
dicts with integer keys, so dicts are turned into lists of lists.
|
||||
Tuples are also turned into lists because tuples don't have a direct
|
||||
representation in json, while lists have a compact representation in
|
||||
json.
|
||||
"""
|
||||
state = dict()
|
||||
for key in self.base_serializable_attributes:
|
||||
state[key] = getattr(self, key)
|
||||
for key in self.extra_serializable_attributes:
|
||||
state[key] = getattr(self, key)
|
||||
r = self.rawdata
|
||||
state['memory_map'] = sorted([list(i) for i in self.memory_map.items()])
|
||||
state['comment ranges'] = [list(a) for a in self.get_style_ranges(comment=True)]
|
||||
state['data ranges'] = [list(a) for a in self.get_style_ranges(data=True)]
|
||||
for i in range(1, user_bit_mask):
|
||||
r = [list(a) for a in self.get_style_ranges(user=i)]
|
||||
if r:
|
||||
slot = "user style %d" % i
|
||||
state[slot] = r
|
||||
|
||||
# json serialization doesn't allow int keys, so convert to list of
|
||||
# pairs
|
||||
state['comments'] = self.get_sorted_comments()
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
"""Custom jsonpickle state restore routine
|
||||
|
||||
The use of jsonpickle to recreate objects doesn't go through __init__,
|
||||
so there will be missing attributes when restoring old versions of the
|
||||
json. Once a version gets out in the wild and additional attributes are
|
||||
added to a segment, a default value should be applied here.
|
||||
"""
|
||||
self.memory_map = dict(state.pop('memory_map', []))
|
||||
self.uuid = state.pop('uuid', uuid())
|
||||
self.can_resize = state.pop('can_resize', self.__class__.can_resize_default)
|
||||
comments = state.pop('comments', {})
|
||||
for k, v in e['comments']:
|
||||
self.comments[k] = v
|
||||
ranges = state.pop('comment ranges')
|
||||
if 'comment ranges' in e:
|
||||
self.set_style_ranges(e['comment ranges'], comment=True)
|
||||
if 'data ranges' in e:
|
||||
self.set_style_ranges(e['data ranges'], user=data_style)
|
||||
if 'display list ranges' in e:
|
||||
# DEPRECATED, but supported on read. Converts display list to
|
||||
# disassembly type 0 for user index 1
|
||||
self.set_style_ranges(e['display list ranges'], data=True, user=1)
|
||||
self.set_user_data(e['display list ranges'], 1, 0)
|
||||
if 'user ranges 1' in e:
|
||||
# DEPRECATED, but supported on read. Converts user extra data 0
|
||||
# (antic dl), 1 (jumpman level), and 2 (jumpman harvest) to user
|
||||
# styles 2, 3, and 4. Data is now user style 1.
|
||||
for r, val in e['user ranges 1']:
|
||||
self.set_style_ranges([r], user=val + 2)
|
||||
for i in range(1, user_bit_mask):
|
||||
slot = "user style %d" % i
|
||||
if slot in e:
|
||||
self.set_style_ranges(e[slot], user=i)
|
||||
self.restore_missing_serializable_defaults()
|
||||
self.__dict__.update(state)
|
||||
self.restore_renamed_serializable_attributes()
|
||||
|
||||
#### style
|
||||
|
||||
def set_style_at_indexes(self, indexes, **kwargs):
|
||||
style_bits = get_style_bits(**kwargs)
|
||||
self._style[indexes] |= style_bits
|
||||
|
||||
def clear_style_at_indexes(self, indexes, **kwargs):
|
||||
style_mask = get_style_mask(**kwargs)
|
||||
self.style[indexes] &= style_mask
|
||||
|
||||
def get_style_at_indexes(self, **kwargs):
|
||||
"""Return a list of start, end pairs that match the specified style
|
||||
"""
|
||||
style_bits = self.get_style_bits(**kwargs)
|
||||
matches = (self._style & style_bits) == style_bits
|
||||
return self.bool_to_ranges(matches)
|
||||
|
||||
def fixup_comments(self):
|
||||
"""Remove any style bytes that are marked as commented but have no
|
||||
comment, and add any style bytes where there's a comment but it isn't
|
||||
marked in the style data.
|
||||
|
||||
This happens on the base data, so only need to do this on one segment
|
||||
that uses this base data.
|
||||
"""
|
||||
style_base = self.rawdata.style_base
|
||||
comment_text_indexes = np.asarray(list(self.rawdata.extra.comments.keys()), dtype=np.uint32)
|
||||
comment_mask = self.get_style_mask(comment=True)
|
||||
has_comments = np.where(style_base & comment_bit_mask > 0)[0]
|
||||
both = np.intersect1d(comment_text_indexes, has_comments)
|
||||
log.info("fixup comments: %d correctly marked, %d without style, %d empty text" % (np.alen(both), np.alen(comment_text_indexes) - np.alen(both), np.alen(has_comments) - np.alen(both)))
|
||||
style_base &= comment_mask
|
||||
comment_style = self.get_style_bits(comment=True)
|
||||
style_base[comment_text_indexes] |= comment_style
|
||||
|
||||
|
||||
class ArrayWrapper:
|
||||
"""Wrapper for numpy data so that manipulations can use normal numpy syntax
|
||||
and still affect the data according to the byte ordering.
|
||||
|
||||
Numpy's fancy indexing can't be used for setting set values, so this
|
||||
intermediate layer is needed that defines the __setitem__ method that
|
||||
explicitly references the byte ordering in the data array.
|
||||
"""
|
||||
|
||||
def __init__(self, data, order):
|
||||
self.np_data = data
|
||||
self.order = order
|
||||
|
||||
def __str__(self):
|
||||
return f"ArrayWrapper at {hex(id(self))} count={len(self)} order={self.order}"
|
||||
|
||||
def __len__(self):
|
||||
return np.alen(self.order)
|
||||
|
||||
def __and__(self, other):
|
||||
return self.np_data[self.order] & other
|
||||
|
||||
def __iand__(self, other):
|
||||
self.np_data[self.order] &= other
|
||||
return self
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self.np_data[self.order[index]]
|
||||
|
||||
def __setitem__(self, index, value):
|
||||
self.np_data[self.order[index]] = value
|
||||
|
||||
|
||||
|
||||
class IndexedSegment:
|
||||
can_resize_default = False
|
||||
|
||||
base_serializable_attributes = ['origin', 'error', 'name', 'verbose_name', 'uuid', 'can_resize']
|
||||
extra_serializable_attributes = []
|
||||
|
||||
def __init__(self, source_segment, offset_or_offset_list, origin=0, name="All", error=None, verbose_name=None, length=None):
|
||||
self.source_segment = source_segment
|
||||
try:
|
||||
start_offset = int(offset_or_offset_list)
|
||||
except TypeError:
|
||||
self.set_offset_from_list(offset_or_offset_list)
|
||||
else:
|
||||
self.set_offset_from_ints(offset_or_offset_list, length)
|
||||
self.verify_offsets()
|
||||
|
||||
self.origin = int(origin) # force python int to decouple from possibly being a numpy datatype
|
||||
self.error = error
|
||||
self.name = name
|
||||
self.verbose_name = verbose_name
|
||||
self.uuid = uuid()
|
||||
|
||||
# Some segments may be resized to contain additional segments not
|
||||
# present when the segment was created.
|
||||
self.can_resize = self.__class__.can_resize_default
|
||||
|
||||
#### properties
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
return ArrayWrapper(self.source_segment._data, self.offset_into_source)
|
||||
|
||||
@property
|
||||
def style(self):
|
||||
return ArrayWrapper(self.source_segment._style, self.offset_into_source)
|
||||
|
||||
def __len__(self):
|
||||
return np.alen(self.offset_into_source)
|
||||
|
||||
#### dunder methods and convenience functions to operate on data (not style)
|
||||
|
||||
def __str__(self):
|
||||
if self.origin > 0:
|
||||
origin = " @ %04x" % (self.origin)
|
||||
else:
|
||||
origin = ""
|
||||
s = "%s ($%x bytes%s)" % (self.name, len(self), origin)
|
||||
if self.error:
|
||||
s += " " + self.error
|
||||
return s
|
||||
|
||||
def __and__(self, other):
|
||||
return self.source_segment._data[self.offset_into_source] & other
|
||||
|
||||
def __iand__(self, other):
|
||||
self.source_segment._data[self.offset_into_source] &= other
|
||||
return self
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self.source_segment._data[self.offset_into_source[index]]
|
||||
|
||||
def __setitem__(self, index, value):
|
||||
self.source_segment._data[self.offset_into_source[index]] = value
|
||||
|
||||
#### offsets
|
||||
|
||||
def set_offset_from_list(self, offsets):
|
||||
self.offset_into_source = to_numpy_list(offsets)
|
||||
|
||||
def set_offset_from_ints(self, start, length):
|
||||
if length is None:
|
||||
raise errors.InvalidSegmentLength
|
||||
self.offset_into_source = np.arange(start, start + length, dtype=np.uint32)
|
||||
|
||||
def verify_offsets(self):
|
||||
self.enforce_offset_bounds()
|
||||
self.offset_from_source = self.calc_reverse_offsets()
|
||||
|
||||
def enforce_offset_bounds(self):
|
||||
self.offset_into_source = self.offset_into_source[self.offset_into_source < len(self.source_segment)]
|
||||
|
||||
def calc_reverse_offsets(self):
|
||||
# Initialize array to out of range
|
||||
r = np.zeros(len(self.source_segment), dtype=np.int32) - 1
|
||||
r[self.offset_into_source] = np.arange(len(self), dtype=np.int32)
|
||||
valid = np.where(r >= 0)[0]
|
||||
if len(valid) != len(self):
|
||||
raise errors.InvalidSegmentOrder
|
||||
return r
|
||||
|
||||
#### subset
|
||||
|
||||
def create_subset(self, new_order, *args, **kwargs):
|
||||
new_order_of_source = self.offset_into_source[new_order]
|
||||
segment = IndexedSegment(self.source_segment, new_order_of_source, *args, **kwargs)
|
||||
return segment
|
||||
|
||||
#### serialization
|
||||
|
||||
def __getstate__(self):
|
||||
"""Custom jsonpickle state save routine
|
||||
|
||||
This routine culls down the list of attributes that should be
|
||||
serialized, and in some cases changes their format slightly so they
|
||||
have a better mapping to json objects. For instance, json can't handle
|
||||
dicts with integer keys, so dicts are turned into lists of lists.
|
||||
Tuples are also turned into lists because tuples don't have a direct
|
||||
representation in json, while lists have a compact representation in
|
||||
json.
|
||||
"""
|
||||
state = dict()
|
||||
for key in self.base_serializable_attributes:
|
||||
state[key] = getattr(self, key)
|
||||
for key in self.extra_serializable_attributes:
|
||||
state[key] = getattr(self, key)
|
||||
r = self.rawdata
|
||||
state['offset_into_source'] = self.calc_serialized_offset_into_source()
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
"""Custom jsonpickle state restore routine
|
||||
|
||||
The use of jsonpickle to recreate objects doesn't go through __init__,
|
||||
so there will be missing attributes when restoring old versions of the
|
||||
json. Once a version gets out in the wild and additional attributes are
|
||||
added to a segment, a default value should be applied here.
|
||||
"""
|
||||
self.memory_map = dict(state.pop('memory_map', []))
|
||||
self.uuid = state.pop('uuid', uuid())
|
||||
self.can_resize = state.pop('can_resize', self.__class__.can_resize_default)
|
||||
self.restore_missing_serializable_defaults()
|
||||
self.__dict__.update(state)
|
||||
self.restore_renamed_serializable_attributes()
|
||||
|
||||
def restore_missing_serializable_defaults(self):
|
||||
"""Hook for the future when extra serializable attributes are added to
|
||||
subclasses so new versions of the code can restore old saved files by
|
||||
providing defaults to any missing attributes.
|
||||
"""
|
||||
pass
|
||||
|
||||
def restore_renamed_serializable_attributes(self):
|
||||
"""Hook for the future if attributes have been renamed. The old
|
||||
attribute names will have been restored in the __dict__.update in
|
||||
__setstate__, so this routine should move attribute values to their new
|
||||
names.
|
||||
"""
|
||||
if hasattr(self, 'start_addr'):
|
||||
self.origin = self.start_addr
|
||||
log.debug(f"moving start_addr to origin: {self.start_addr}")
|
||||
delattr(self, 'start_addr')
|
||||
|
||||
|
||||
@property
|
||||
def verbose_info(self):
|
||||
name = self.verbose_name or self.name
|
||||
if self.rawdata.is_indexed:
|
||||
s = "%s ($%04x bytes) non-contiguous file; file index of first byte: $%04x" % (name, len(self), self.rawdata.order[0])
|
||||
else:
|
||||
s = "%s ($%04x bytes)" % (name, len(self))
|
||||
if self.error:
|
||||
s += " error='%s'" % self.error
|
||||
return s
|
||||
|
||||
def is_valid_index(self, i):
|
||||
return i >= 0 and i < len(self)
|
||||
|
||||
def tobytes(self):
|
||||
return self.source_segment._data[self.offset_into_source].tobytes()
|
||||
|
||||
def get_style_bits(self, **kwargs):
|
||||
return get_style_bits(**kwargs)
|
||||
|
||||
def get_style_mask(self, **kwargs):
|
||||
return get_style_mask(**kwargs)
|
||||
|
||||
def calc_source_indexes_from_ranges(self, ranges):
|
||||
source_indexes = np.zeros(len(self.source_segment), dtype=np.uint8)
|
||||
offsets = self.offset_into_source
|
||||
for start, end in ranges:
|
||||
if end < start:
|
||||
start, end = end, start
|
||||
source_indexes[offsets[start:end]] = 1
|
||||
affected_source_indexes = np.where(source_indexes > 0)[0]
|
||||
return affected_source_indexes
|
||||
|
||||
def set_style_ranges(self, ranges, **kwargs):
|
||||
indexes = self.calc_source_indexes_from_ranges(ranges)
|
||||
self.source_segment.set_style_at_indexes(indexes, **kwargs)
|
||||
|
||||
def clear_style_ranges(self, ranges, **kwargs):
|
||||
indexes = self.calc_source_indexes_from_ranges(ranges)
|
||||
self.source_segment.clear_style_at_indexes(indexes, **kwargs)
|
||||
|
||||
def clear_style_bits(self, **kwargs):
|
||||
self.source_segment.clear_style_at_indexes(self.offset_into_source, **kwargs)
|
||||
|
||||
def get_style_ranges(self, **kwargs):
|
||||
"""Return a list of start, end pairs that match the specified style
|
||||
"""
|
||||
style_bits = self.get_style_bits(**kwargs)
|
||||
matches = (self.style & style_bits) == style_bits
|
||||
return self.bool_to_ranges(matches)
|
||||
|
||||
def get_comment_locations(self, **kwargs):
|
||||
style_bits = self.get_style_bits(**kwargs)
|
||||
r = self.rawdata.copy()
|
||||
#print len(r.style)
|
||||
#print len(r.style_base)
|
||||
r.style_base[:] &= style_bits
|
||||
comment_indexes = np.asarray(list(self.rawdata.extra.comments.keys()), dtype=np.uint32)
|
||||
#print comment_indexes
|
||||
r.style_base[comment_indexes] |= comment_bit_mask
|
||||
return r.unindexed_style[:]
|
||||
|
||||
def bool_to_ranges(self, matches):
|
||||
w = np.where(matches == True)[0]
|
||||
# split into groups with consecutive numbers
|
||||
groups = np.split(w, np.where(np.diff(w) != 1)[0] + 1)
|
||||
ranges = []
|
||||
for group in groups:
|
||||
if np.alen(group) > 0:
|
||||
ranges.append((int(group[0]), int(group[-1]) + 1))
|
||||
return ranges
|
||||
|
||||
def find_next(self, index, **kwargs):
|
||||
ranges = self.get_style_ranges(**kwargs)
|
||||
if len(ranges) > 0:
|
||||
index_tuple = (index + 1, 0)
|
||||
match_index = bisect.bisect_right(ranges, index_tuple)
|
||||
if match_index >= len(ranges):
|
||||
match_index = 0
|
||||
return ranges[match_index][0]
|
||||
return None
|
||||
|
||||
def find_previous(self, index, **kwargs):
|
||||
ranges = self.get_style_ranges(**kwargs)
|
||||
if len(ranges) > 0:
|
||||
index_tuple = (index - 1, 0)
|
||||
match_index = bisect.bisect_left(ranges, index_tuple)
|
||||
match_index -= 1
|
||||
if match_index < 0:
|
||||
match_index = len(ranges) - 1
|
||||
return ranges[match_index][0]
|
||||
return None
|
||||
|
||||
def set_user_data(self, ranges, user_index, user_data):
|
||||
for start, end in ranges:
|
||||
# FIXME: this is slow
|
||||
for i in range(start, end):
|
||||
rawindex = self.get_raw_index(i)
|
||||
self.rawdata.extra.user_data[user_index][rawindex] = user_data
|
||||
|
||||
def get_user_data(self, index, user_index):
|
||||
rawindex = self.get_raw_index(index)
|
||||
try:
|
||||
return self.rawdata.extra.user_data[user_index][rawindex]
|
||||
except KeyError:
|
||||
return 0
|
||||
|
||||
def get_sorted_user_data(self, user_index):
|
||||
d = self.rawdata.extra.user_data[user_index]
|
||||
indexes = sorted(d.keys())
|
||||
ranges = []
|
||||
start, end, current = None, None, None
|
||||
for i in indexes:
|
||||
if start is None:
|
||||
start = i
|
||||
current = d[i]
|
||||
else:
|
||||
if d[i] != current or i != end:
|
||||
ranges.append([[start, end], current])
|
||||
start = i
|
||||
current = d[i]
|
||||
end = i + 1
|
||||
if start is not None:
|
||||
ranges.append([[start, end], current])
|
||||
return ranges
|
||||
|
||||
def remove_comments_at_indexes(self, indexes):
|
||||
for where_index in indexes:
|
||||
self.remove_comment(where_index)
|
||||
|
||||
def set_comments_at_indexes(self, ranges, indexes, comments):
|
||||
for where_index, comment in zip(indexes, comments):
|
||||
rawindex = self.get_raw_index(where_index)
|
||||
if comment:
|
||||
log.debug(" restoring comment: rawindex=%d, '%s'" % (rawindex, comment))
|
||||
self.rawdata.extra.comments[rawindex] = comment
|
||||
else:
|
||||
try:
|
||||
del self.rawdata.extra.comments[rawindex]
|
||||
log.debug(" no comment in original data, removed comment in current data at rawindex=%d" % rawindex)
|
||||
except KeyError:
|
||||
log.debug(" no comment in original data or current data at rawindex=%d" % rawindex)
|
||||
pass
|
||||
|
||||
def get_comments_at_indexes(self, indexes):
|
||||
"""Get a list of comments at specified indexes"""
|
||||
s = self.style[indexes]
|
||||
has_comments = np.where(s & comment_bit_mask > 0)[0]
|
||||
comments = []
|
||||
for where_index in has_comments:
|
||||
raw = self.get_raw_index(indexes[where_index])
|
||||
try:
|
||||
comment = self.rawdata.extra.comments[raw]
|
||||
except KeyError:
|
||||
comment = None
|
||||
comments.append(comment)
|
||||
return has_comments, comments
|
||||
|
||||
def get_comment_restore_data(self, ranges):
|
||||
"""Get a chunk of data (designed to be opaque) containing comments,
|
||||
styles & locations that can be used to recreate the comments on an undo
|
||||
"""
|
||||
restore_data = []
|
||||
for start, end in ranges:
|
||||
log.debug("range: %d-%d" % (start, end))
|
||||
styles = self.style[start:end].copy()
|
||||
items = {}
|
||||
for i in range(start, end):
|
||||
rawindex = self.get_raw_index(i)
|
||||
try:
|
||||
comment = self.rawdata.extra.comments[rawindex]
|
||||
log.debug(" index: %d rawindex=%d '%s'" % (i, rawindex, comment))
|
||||
items[i] = (rawindex, comment)
|
||||
except KeyError:
|
||||
log.debug(" index: %d rawindex=%d NO COMMENT TO SAVE" % (i, rawindex))
|
||||
items[i] = (rawindex, None)
|
||||
|
||||
restore_data.append((start, end, styles, items))
|
||||
return restore_data
|
||||
|
||||
def restore_comments(self, restore_data):
|
||||
"""Restore comment styles and data
|
||||
"""
|
||||
for start, end, styles, items in restore_data:
|
||||
log.debug("range: %d-%d" % (start, end))
|
||||
self.style[start:end] = styles
|
||||
for i in range(start, end):
|
||||
rawindex, comment = items[i]
|
||||
if comment:
|
||||
log.debug(" restoring comment: rawindex=%d, '%s'" % (rawindex, comment))
|
||||
self.rawdata.extra.comments[rawindex] = comment
|
||||
else:
|
||||
# no comment in original data, remove any if exists
|
||||
try:
|
||||
del self.rawdata.extra.comments[rawindex]
|
||||
log.debug(" no comment in original data, removed comment in current data at rawindex=%d" % rawindex)
|
||||
except KeyError:
|
||||
log.debug(" no comment in original data or current data at rawindex=%d" % rawindex)
|
||||
pass
|
||||
|
||||
def get_comments_in_range(self, start, end):
|
||||
"""Get a list of comments at specified indexes"""
|
||||
comments = {}
|
||||
|
||||
# Naive way, but maybe it's fast enough: loop over all comments
|
||||
# gathering those within the bounds
|
||||
for rawindex, comment in self.rawdata.extra.comments.items():
|
||||
try:
|
||||
index = self.get_index_from_base_index(rawindex)
|
||||
except IndexError:
|
||||
continue
|
||||
if index >= start and index < end:
|
||||
comments[index] = comment
|
||||
return comments
|
||||
|
||||
def set_comment_at(self, index, text):
|
||||
rawindex = self.get_raw_index(index)
|
||||
self.rawdata.extra.comments[rawindex] = text
|
||||
|
||||
def set_comment(self, ranges, text):
|
||||
self.set_style_ranges(ranges, comment=True)
|
||||
for start, end in ranges:
|
||||
self.set_comment_at(start, text)
|
||||
|
||||
def get_comment(self, index):
|
||||
rawindex = self.get_raw_index(index)
|
||||
return self.rawdata.extra.comments.get(rawindex, "")
|
||||
|
||||
def remove_comment(self, index):
|
||||
rawindex = self.get_raw_index(index)
|
||||
try:
|
||||
del self.rawdata.extra.comments[rawindex]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def get_first_comment(self, ranges):
|
||||
start = reduce(min, [r[0] for r in ranges])
|
||||
rawindex = self.get_raw_index(start)
|
||||
return self.rawdata.extra.comments.get(rawindex, "")
|
||||
|
||||
def clear_comment(self, ranges):
|
||||
self.clear_style_ranges(ranges, comment=True)
|
||||
for start, end in ranges:
|
||||
for i in range(start, end):
|
||||
rawindex = self.get_raw_index(i)
|
||||
if rawindex in self.rawdata.extra.comments:
|
||||
del self.rawdata.extra.comments[rawindex]
|
||||
|
||||
def get_sorted_comments(self):
|
||||
return sorted([[k, v] for k, v in self.rawdata.extra.comments.items()])
|
||||
|
||||
def iter_comments_in_segment(self):
|
||||
start = self.origin
|
||||
start_index = self.get_raw_index(0)
|
||||
end_index = self.get_raw_index(len(self.rawdata))
|
||||
for k, v in self.rawdata.extra.comments.items():
|
||||
if k >= start_index and k < end_index:
|
||||
yield self.rawdata.get_reverse_index(k), v
|
||||
|
||||
def copy_user_data(self, source, index_offset=0):
|
||||
"""Copy comments and other user data from the source segment to this
|
||||
segment.
|
||||
|
||||
The index offset is the offset into self based on the index of source.
|
||||
"""
|
||||
for index, comment in source.iter_comments_in_segment():
|
||||
self.set_comment_at(index + index_offset, comment)
|
||||
|
||||
def label(self, index, lower_case=True):
|
||||
if lower_case:
|
||||
return "%04x" % (index + self.origin)
|
||||
else:
|
||||
return "%04X" % (index + self.origin)
|
||||
|
||||
@property
|
||||
def search_copy(self):
|
||||
if self._search_copy is None:
|
||||
self._search_copy = self.data.tobytes()
|
||||
return self._search_copy
|
||||
|
||||
def compare_segment(self, other_segment):
|
||||
self.clear_style_bits(diff=True)
|
||||
diff = self.rawdata.data != other_segment.rawdata.data
|
||||
d = diff * np.uint8(diff_bit_mask)
|
||||
self.style |= (diff * np.uint8(diff_bit_mask))
|
||||
log.debug("compare_segment: # entries %d, # diffs: %d" % (len(diff), len(np.where(diff == True)[0])))
|
@ -1,8 +1,6 @@
|
||||
import hashlib
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .segments import SegmentData, DefaultSegment
|
||||
from .segments import DefaultSegment
|
||||
from .kboot import KBootImage
|
||||
from .ataridos import AtariDosDiskImage, BootDiskImage, AtariDosFile, XexContainerSegment, AtariDiskImage
|
||||
from .spartados import SpartaDosDiskImage
|
||||
@ -13,7 +11,6 @@ from .standard_delivery import StandardDeliveryImage
|
||||
from . import errors
|
||||
from .magic import guess_detail_for_mime
|
||||
from . import container
|
||||
from .dcm import DCMContainer
|
||||
from .signatures import sha1_signatures
|
||||
|
||||
import logging
|
||||
@ -25,11 +22,11 @@ class SegmentParser:
|
||||
image_type = None
|
||||
container_segment = DefaultSegment
|
||||
|
||||
def __init__(self, segment_data, strict=False):
|
||||
def __init__(self, container, strict=False):
|
||||
self.image = None
|
||||
self.segments = []
|
||||
self.strict = strict
|
||||
self.segment_data = segment_data
|
||||
self.container = container
|
||||
self.parse()
|
||||
|
||||
def __str__(self):
|
||||
@ -68,8 +65,7 @@ class SegmentParser:
|
||||
self.__dict__.update(state)
|
||||
|
||||
def parse(self):
|
||||
r = self.segment_data
|
||||
self.segments.append(self.container_segment(r, 0, name=self.menu_name))
|
||||
r = self.container
|
||||
try:
|
||||
log.debug("Trying %s" % self.image_type)
|
||||
log.debug(self.image_type.__mro__)
|
||||
@ -84,7 +80,7 @@ class SegmentParser:
|
||||
|
||||
def reconstruct_segments(self, new_rawdata):
|
||||
self.image = self.get_image(new_rawdata)
|
||||
self.segment_data = new_rawdata
|
||||
self.container = new_rawdata
|
||||
for s in self.segments:
|
||||
s.reconstruct_raw(new_rawdata)
|
||||
|
||||
@ -106,7 +102,7 @@ class DefaultSegmentParser(SegmentParser):
|
||||
menu_name = "Raw Data"
|
||||
|
||||
def parse(self):
|
||||
self.segments = [DefaultSegment(self.segment_data, 0)]
|
||||
self.segments = [DefaultSegment(self.container, 0)]
|
||||
|
||||
|
||||
class KBootSegmentParser(SegmentParser):
|
||||
@ -201,36 +197,12 @@ class ProdosSegmentParser(SegmentParser):
|
||||
image_type = ProdosDiskImage
|
||||
|
||||
|
||||
known_containers = [
|
||||
container.GZipContainer,
|
||||
container.BZipContainer,
|
||||
container.LZMAContainer,
|
||||
DCMContainer,
|
||||
]
|
||||
|
||||
|
||||
def guess_container(r, verbose=False):
|
||||
for c in known_containers:
|
||||
if verbose:
|
||||
log.info(f"trying container {c}")
|
||||
try:
|
||||
found = c(r)
|
||||
except errors.InvalidContainer as e:
|
||||
continue
|
||||
else:
|
||||
if verbose:
|
||||
log.info(f"found container {c}")
|
||||
return found
|
||||
log.info(f"image does not appear to be a container.")
|
||||
return None
|
||||
|
||||
|
||||
def guess_parser_by_size(r, verbose=False):
|
||||
def guess_parser_by_size(container, verbose=False):
|
||||
found = None
|
||||
mime = None
|
||||
size = len(r)
|
||||
size = len(container)
|
||||
if size in sha1_signatures:
|
||||
sha_hash = hashlib.sha1(r.data).digest()
|
||||
sha_hash = container.sha1
|
||||
log.info(f"{size} in signature database, attempting to match {sha_hash}")
|
||||
try:
|
||||
match = sha1_signatures[size][sha_hash]
|
||||
@ -242,7 +214,7 @@ def guess_parser_by_size(r, verbose=False):
|
||||
parsers = mime_parsers[mime]
|
||||
for parser in parsers:
|
||||
try:
|
||||
found = parser(r, False)
|
||||
found = parser(container, False)
|
||||
break
|
||||
except errors.InvalidSegmentParser as e:
|
||||
if verbose:
|
||||
@ -254,12 +226,12 @@ def guess_parser_by_size(r, verbose=False):
|
||||
log.info(f"{size} not found in signature database; skipping sha1 matching")
|
||||
return mime, found
|
||||
|
||||
def guess_parser_for_mime(mime, r, verbose=False):
|
||||
def guess_parser_for_mime(mime, container, verbose=False):
|
||||
parsers = mime_parsers[mime]
|
||||
found = None
|
||||
for parser in parsers:
|
||||
try:
|
||||
found = parser(r, True)
|
||||
found = parser(container, True)
|
||||
break
|
||||
except errors.InvalidSegmentParser as e:
|
||||
if verbose:
|
||||
@ -268,27 +240,24 @@ def guess_parser_for_mime(mime, r, verbose=False):
|
||||
return found
|
||||
|
||||
|
||||
def guess_parser_for_system(mime_base, r):
|
||||
def guess_parser_for_system(mime_base, container):
|
||||
for mime in mime_parse_order:
|
||||
if mime.startswith(mime_base):
|
||||
p = guess_parser_for_mime(mime, r)
|
||||
p = guess_parser_for_mime(mime, container)
|
||||
if p is not None:
|
||||
mime = guess_detail_for_mime(mime, r, p)
|
||||
mime = guess_detail_for_mime(mime, container, p)
|
||||
return mime, p
|
||||
return None, None
|
||||
|
||||
|
||||
def iter_parsers(r):
|
||||
container = guess_container(r.data)
|
||||
if container is not None:
|
||||
r = SegmentData(container.unpacked)
|
||||
mime, parser = guess_parser_by_size(r)
|
||||
def iter_parsers(container):
|
||||
mime, parser = guess_parser_by_size(container)
|
||||
if parser is None:
|
||||
for mime in mime_parse_order:
|
||||
p = guess_parser_for_mime(mime, r)
|
||||
p = guess_parser_for_mime(mime, container)
|
||||
if p is not None:
|
||||
parser = p
|
||||
mime = guess_detail_for_mime(mime, r, p)
|
||||
mime = guess_detail_for_mime(mime, container, p)
|
||||
break
|
||||
return mime, parser
|
||||
|
@ -5,58 +5,13 @@ import numpy as np
|
||||
|
||||
from . import errors
|
||||
from .utils import to_numpy, to_numpy_list, uuid
|
||||
from . import style_bits
|
||||
from functools import reduce
|
||||
|
||||
user_bit_mask = 0x07
|
||||
data_style = 0x1
|
||||
not_user_bit_mask = 0xff ^ user_bit_mask
|
||||
diff_bit_mask = 0x10
|
||||
match_bit_mask = 0x20
|
||||
comment_bit_mask = 0x40
|
||||
selected_bit_mask = 0x80
|
||||
|
||||
import logging
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_style_bits(match=False, comment=False, selected=False, data=False, diff=False, user=0):
|
||||
""" Return an int value that contains the specified style bits set.
|
||||
|
||||
Available styles for each byte are:
|
||||
|
||||
match: part of the currently matched search
|
||||
comment: user commented area
|
||||
selected: selected region
|
||||
data: labeled in the disassembler as a data region (i.e. not disassembled)
|
||||
"""
|
||||
style_bits = 0
|
||||
if user:
|
||||
style_bits |= (user & user_bit_mask)
|
||||
if diff:
|
||||
style_bits |= diff_bit_mask
|
||||
if match:
|
||||
style_bits |= match_bit_mask
|
||||
if comment:
|
||||
style_bits |= comment_bit_mask
|
||||
if data:
|
||||
style_bits |= (data_style & user_bit_mask)
|
||||
if selected:
|
||||
style_bits |= selected_bit_mask
|
||||
return style_bits
|
||||
|
||||
|
||||
def get_style_mask(**kwargs):
|
||||
"""Get the bit mask that, when anded with data, will turn off the
|
||||
selected bits
|
||||
"""
|
||||
bits = get_style_bits(**kwargs)
|
||||
if 'user' in kwargs and kwargs['user']:
|
||||
bits |= user_bit_mask
|
||||
else:
|
||||
bits &= (0xff ^ user_bit_mask)
|
||||
return 0xff ^ bits
|
||||
|
||||
|
||||
class SegmentSaver:
|
||||
export_data_name = "Raw Data"
|
||||
export_extensions = [".dat"]
|
||||
@ -80,7 +35,7 @@ class BSAVESaver:
|
||||
return header.tobytes() + segment.tobytes()
|
||||
|
||||
|
||||
class OrderWrapper:
|
||||
class ArrayWrapper:
|
||||
"""Wrapper for numpy data so that manipulations can use normal numpy syntax
|
||||
and still affect the data according to the byte ordering.
|
||||
|
||||
@ -89,13 +44,12 @@ class OrderWrapper:
|
||||
explicitly references the byte ordering in the data array.
|
||||
"""
|
||||
|
||||
def __init__(self, data, byte_order):
|
||||
def __init__(self, data, order):
|
||||
self.np_data = data
|
||||
self.base = data.base # base array for numpy bounds determination
|
||||
self.order = byte_order
|
||||
self.order = order
|
||||
|
||||
def __str__(self):
|
||||
return f"OrderWrapper at {hex(id(self))} count={len(self)} order={self.order} base: count={len(self.np_data)}"
|
||||
return f"ArrayWrapper at {hex(id(self))} count={len(self)} order={self.order}"
|
||||
|
||||
def __len__(self):
|
||||
return np.alen(self.order)
|
||||
@ -113,361 +67,111 @@ class OrderWrapper:
|
||||
def __setitem__(self, index, value):
|
||||
self.np_data[self.order[index]] = value
|
||||
|
||||
def sub_index(self, index):
|
||||
"""Return index of index so it can be used directly in a new
|
||||
SegmentData object, rather than propagating multiple index lookups by
|
||||
contructing a new OrderWrapper that calls parent OrderWrapper objects.
|
||||
"""
|
||||
return self.order[index]
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
return (len(self),)
|
||||
|
||||
@property
|
||||
def unindexed(self):
|
||||
return self.np_data[self.order]
|
||||
|
||||
def tobytes(self):
|
||||
return self.np_data[self.order].tobytes()
|
||||
|
||||
|
||||
class UserExtraData:
|
||||
def __init__(self):
|
||||
self.comments = dict()
|
||||
self.user_data = dict()
|
||||
for i in range(1, user_bit_mask):
|
||||
self.user_data[i] = dict()
|
||||
|
||||
|
||||
class SegmentData:
|
||||
def __init__(self, data, style=None, extra=None, debug=False, order=None):
|
||||
"""Storage for raw data
|
||||
|
||||
order is a list into the base array's data; each item in the list is an
|
||||
index of the base array. E.g. if the base array is the 20 element list
|
||||
containing the data [100, 101, ... 119] and the order is [10, 0, 5, 2],
|
||||
the segment data used is [110, 100, 105, 102]
|
||||
"""
|
||||
self.order = order
|
||||
self.is_indexed = order is not None
|
||||
if self.is_indexed:
|
||||
self.data = OrderWrapper(data, order)
|
||||
else:
|
||||
self.data = to_numpy(data)
|
||||
self.calc_lookups()
|
||||
if style is None:
|
||||
if debug:
|
||||
self.style = np.arange(len(self), dtype=np.uint8)
|
||||
else:
|
||||
self.style = np.zeros(len(self), dtype=np.uint8)
|
||||
else:
|
||||
if self.is_indexed:
|
||||
self.style = OrderWrapper(style, order)
|
||||
else:
|
||||
self.style = style
|
||||
if extra is None:
|
||||
extra = UserExtraData()
|
||||
self.extra = extra
|
||||
|
||||
def __str__(self):
|
||||
return "SegmentData id=%x indexed=%s data=%s len=%s" % (id(self), self.is_indexed, type(self.data), len(self.data))
|
||||
|
||||
def __len__(self):
|
||||
return self.data_length
|
||||
|
||||
def resize(self, newsize):
|
||||
if self.data.base is None:
|
||||
try:
|
||||
newdata = np.resize(self.data, (newsize,))
|
||||
newstyle = np.resize(self.style, (newsize,))
|
||||
except:
|
||||
raise
|
||||
else:
|
||||
self.data = newdata
|
||||
self.style = newstyle
|
||||
else:
|
||||
raise ValueError("Can't resize a view of a segment")
|
||||
self.calc_lookups()
|
||||
|
||||
def replace_arrays(self, base_raw):
|
||||
newsize = len(base_raw)
|
||||
oldsize = len(self.data_base)
|
||||
if newsize < oldsize:
|
||||
raise errors.NotImplementedError("Can't truncate yet")
|
||||
if self.is_indexed:
|
||||
self.data.np_data = base_raw.data
|
||||
self.data.base = base_raw.data.base
|
||||
self.style.np_data = base_raw.style
|
||||
self.style.base = base_raw.style.base
|
||||
elif self.data.base is not None:
|
||||
# if there is no base array, we aren't looking at a slice so we
|
||||
# must be copying the entire array.
|
||||
start, end = self.byte_bounds_offset()
|
||||
self.data = base_raw.data[start:end]
|
||||
self.style = base_raw.style[start:end]
|
||||
else:
|
||||
raise ValueError("The base SegmentData object should use the resize method to replace arrays")
|
||||
self.calc_lookups()
|
||||
|
||||
def calc_lookups(self):
|
||||
if self.is_indexed:
|
||||
end = len(self.data.np_data)
|
||||
self.data_start, self.data_end = 0, end
|
||||
self.base_start, self.base_end = 0, end
|
||||
base_size = end
|
||||
elif self.data.base is None:
|
||||
end = len(self.data)
|
||||
self.data_start, self.data_end = 0, end
|
||||
self.base_start, self.base_end = 0, end
|
||||
base_size = end
|
||||
else:
|
||||
self.data_start, self.data_end = np.byte_bounds(self.data)
|
||||
self.base_start, self.base_end = np.byte_bounds(self.data.base)
|
||||
base_size = len(self.data.base)
|
||||
self.base_length = base_size
|
||||
self.data_length = len(self.data)
|
||||
# Force regeneration of reverse index mapping the next time it's needed
|
||||
self._reverse_index_mapping = None
|
||||
|
||||
@property
|
||||
def bufferedio(self):
|
||||
buf = io.BytesIO(self.data[:])
|
||||
return buf
|
||||
|
||||
@property
|
||||
def is_base(self):
|
||||
return not self.is_indexed and self.data.base is None
|
||||
|
||||
@property
|
||||
def data_base(self):
|
||||
return self.data.np_data if self.is_indexed else self.data.base if self.data.base is not None else self.data
|
||||
|
||||
@property
|
||||
def style_base(self):
|
||||
return self.style.np_data if self.is_indexed else self.style.base if self.style.base is not None else self.style
|
||||
|
||||
def get_data(self):
|
||||
return self.data
|
||||
|
||||
def get_style(self):
|
||||
return self.style
|
||||
|
||||
@property
|
||||
def unindexed_data(self):
|
||||
if self.is_indexed:
|
||||
return self.data.unindexed
|
||||
return self.data
|
||||
|
||||
@property
|
||||
def unindexed_style(self):
|
||||
if self.is_indexed:
|
||||
return self.style.unindexed
|
||||
return self.style
|
||||
|
||||
def byte_bounds_offset(self):
|
||||
"""Return start and end offsets of this segment's data into the
|
||||
base array's data.
|
||||
|
||||
This ignores the byte order index. Arrays using the byte order index
|
||||
will have the entire base array's raw data.
|
||||
"""
|
||||
if self.data.base is None:
|
||||
if self.is_indexed:
|
||||
basearray = self.data.np_data
|
||||
else:
|
||||
basearray = self.data
|
||||
return 0, len(basearray)
|
||||
return int(self.data_start - self.base_start), int(self.data_end - self.base_start)
|
||||
|
||||
def get_raw_index(self, i):
|
||||
"""Get index into base array's raw data, given the index into this
|
||||
segment
|
||||
"""
|
||||
if self.is_indexed:
|
||||
return int(self.order[i])
|
||||
if self.data.base is None:
|
||||
return int(i)
|
||||
return int(self.data_start - self.base_start + i)
|
||||
|
||||
def get_indexes_from_base(self):
|
||||
"""Get array of indexes from the base array, as if this raw data were
|
||||
indexed.
|
||||
"""
|
||||
if self.is_indexed:
|
||||
return np.copy(self.order[i])
|
||||
if self.data.base is None:
|
||||
i = 0
|
||||
else:
|
||||
i = self.get_raw_index(0)
|
||||
return np.arange(i, i + len(self), dtype=np.uint32)
|
||||
|
||||
def __getitem__(self, index):
|
||||
if self.is_indexed:
|
||||
order = self.data.sub_index(index)
|
||||
d = self.data.np_data
|
||||
s = self.style.np_data
|
||||
else:
|
||||
order = None
|
||||
d = self.data[index]
|
||||
s = self.style[index]
|
||||
e = self.extra
|
||||
return SegmentData(d, s, e, order=order)
|
||||
|
||||
def copy(self):
|
||||
if self.is_indexed:
|
||||
d = self.data.np_data.copy()
|
||||
s = self.style.np_data.copy()
|
||||
copy = SegmentData(d, s, order=self.order)
|
||||
elif self.data.base is None:
|
||||
# if there is no base array, we aren't looking at a slice so we
|
||||
# must be copying the entire array.
|
||||
d = self.data.copy()
|
||||
s = self.style.copy()
|
||||
copy = SegmentData(d, s)
|
||||
else:
|
||||
d = self.data.base.copy()
|
||||
s = self.style.base.copy()
|
||||
start, end = self.byte_bounds_offset()
|
||||
copy = SegmentData(d[start:end], s[start:end])
|
||||
return copy
|
||||
|
||||
def get_bases(self):
|
||||
if self.data.base is None:
|
||||
data_base = self.data
|
||||
style_base = self.style
|
||||
else:
|
||||
data_base = self.data.base
|
||||
style_base = self.style.base
|
||||
return data_base, style_base
|
||||
|
||||
def get_indexed(self, index):
|
||||
index = to_numpy_list(index)
|
||||
if self.is_indexed:
|
||||
return self[index]
|
||||
|
||||
# check to make sure all indexes are valid, raises IndexError if not
|
||||
check = self.data[index]
|
||||
|
||||
# index needs to be relative to the base array
|
||||
base_index = index + self.get_raw_index(0)
|
||||
data_base, style_base = self.get_bases()
|
||||
return SegmentData(data_base, style_base, self.extra, order=base_index)
|
||||
|
||||
@property
|
||||
def reverse_index_mapping(self):
|
||||
"""Get mapping from this segment's indexes to the indexes of
|
||||
the base array.
|
||||
|
||||
If the index is < 0, the index is out of range, meaning that it doesn't
|
||||
exist in this segment and is not mapped to the base array
|
||||
"""
|
||||
if self._reverse_index_mapping is None:
|
||||
if self.is_indexed:
|
||||
# Initialize array to out of range
|
||||
r = np.zeros(self.base_length, dtype=np.int32) - 1
|
||||
r[self.order] = np.arange(len(self.order), dtype=np.int32)
|
||||
elif self.data.base is None:
|
||||
# Starts at the beginning; produces the identity
|
||||
r = np.arange(self.data_length, dtype=np.int32)
|
||||
else:
|
||||
r = np.zeros(self.base_length, dtype=np.int32) - 1
|
||||
r[self.data_start - self.base_start:self.data_end - self.base_start] = np.arange(self.data_length, dtype=np.int32)
|
||||
self._reverse_index_mapping = r
|
||||
return self._reverse_index_mapping
|
||||
|
||||
def get_reverse_index(self, base_index):
|
||||
"""Get index into this segment's data given the index into the base data
|
||||
|
||||
Raises IndexError if the base index doesn't map to anything in this
|
||||
segment's data
|
||||
"""
|
||||
r = self.reverse_index_mapping[base_index]
|
||||
if r < 0:
|
||||
raise IndexError("index %d not mapped in this segment" % base_index)
|
||||
return r
|
||||
# Dummy to prevent import statement failures
|
||||
pass
|
||||
|
||||
|
||||
class DefaultSegment:
|
||||
savers = [SegmentSaver, BSAVESaver]
|
||||
can_resize_default = False
|
||||
|
||||
base_serializable_attributes = ['origin', 'error', 'name', 'verbose_name', 'page_size', 'map_width', 'uuid', 'can_resize']
|
||||
base_serializable_attributes = ['origin', 'error', 'name', 'verbose_name', 'uuid', 'can_resize']
|
||||
extra_serializable_attributes = []
|
||||
|
||||
def __init__(self, rawdata, origin=0, name="All", error=None, verbose_name=None, memory_map=None):
|
||||
def __init__(self, container, offset_or_offset_list, origin=0, name="All", error=None, verbose_name=None, length=None):
|
||||
self.container = container
|
||||
try:
|
||||
start_offset = int(offset_or_offset_list)
|
||||
except TypeError:
|
||||
self.set_offset_from_list(offset_or_offset_list)
|
||||
else:
|
||||
self.set_offset_from_ints(offset_or_offset_list, length)
|
||||
self.verify_offsets()
|
||||
|
||||
self.origin = int(origin) # force python int to decouple from possibly being a numpy datatype
|
||||
self.set_raw(rawdata)
|
||||
self.error = error
|
||||
self.name = name
|
||||
self.verbose_name = verbose_name
|
||||
self.page_size = -1
|
||||
self.map_width = 40
|
||||
self.uuid = uuid()
|
||||
if memory_map is None:
|
||||
memory_map = {}
|
||||
self.memory_map = memory_map
|
||||
|
||||
# Some segments may be resized to contain additional segments not
|
||||
# present when the segment was created.
|
||||
self.can_resize = self.__class__.can_resize_default
|
||||
|
||||
def set_raw(self, rawdata):
|
||||
if type(rawdata) != SegmentData:
|
||||
log.warning(f"data not in SegmentData format: {type(rawdata)}")
|
||||
rawdata = SegmentData(rawdata)
|
||||
self.rawdata = rawdata
|
||||
self.update_raw_pointers()
|
||||
#### properties
|
||||
|
||||
def get_raw(self):
|
||||
return self.rawdata
|
||||
@property
|
||||
def data(self):
|
||||
return ArrayWrapper(self.container._data, self.container_offset)
|
||||
|
||||
def update_raw_pointers(self):
|
||||
self.data = self.rawdata.get_data()
|
||||
self.style = self.rawdata.get_style()
|
||||
self._search_copy = None
|
||||
@property
|
||||
def style(self):
|
||||
return ArrayWrapper(self.container._style, self.container_offset)
|
||||
|
||||
def resize(self, newsize, zeros=True):
|
||||
""" Resize the data arrays.
|
||||
def __len__(self):
|
||||
return np.alen(self.container_offset)
|
||||
|
||||
This can only be performed on the container segment. Child segments
|
||||
must adjust their rawdata to point to the correct place.
|
||||
#### dunder methods and convenience functions to operate on data (not style)
|
||||
|
||||
Since segments don't keep references to other segments, it is the
|
||||
user's responsibility to update any child segments that point to this
|
||||
segment's data.
|
||||
def __str__(self):
|
||||
if self.origin > 0:
|
||||
origin = " @ %04x" % (self.origin)
|
||||
else:
|
||||
origin = ""
|
||||
s = "%s ($%x bytes%s)" % (self.name, len(self), origin)
|
||||
if self.error:
|
||||
s += " " + self.error
|
||||
return s
|
||||
|
||||
Numpy can't do an in-place resize on an array that has a view, so the
|
||||
data must be replaced and all segments that point to that raw data must
|
||||
also be changed. This has to happen outside this method because it
|
||||
doesn't know the segment list of segments using itself as a base.
|
||||
"""
|
||||
if not self.can_resize:
|
||||
raise ValueError("Segment %s can't be resized" % str(self))
|
||||
# only makes sense for the container (outermost) object
|
||||
if not self.rawdata.is_base:
|
||||
raise ValueError("Only container segments can be resized")
|
||||
origsize = len(self)
|
||||
self.rawdata.resize(newsize)
|
||||
self.set_raw(self.rawdata) # force attributes to be reset
|
||||
newsize = len(self)
|
||||
if zeros:
|
||||
if newsize > origsize:
|
||||
self.data[origsize:] = 0
|
||||
self.style[origsize:] = 0
|
||||
return origsize, newsize
|
||||
def __and__(self, other):
|
||||
return self.container._data[self.container_offset] & other
|
||||
|
||||
def replace_data(self, container):
|
||||
self.rawdata.replace_arrays(container.rawdata)
|
||||
self.update_raw_pointers()
|
||||
def __iand__(self, other):
|
||||
self.container._data[self.container_offset] &= other
|
||||
return self
|
||||
|
||||
def create_subset(self, new_order, name, verbose_name=""):
|
||||
raw = self.rawdata.get_indexed(new_order)
|
||||
if not verbose_name:
|
||||
verbose_name = name
|
||||
segment = DefaultSegment(raw, name=name, verbose_name=verbose_name)
|
||||
def __getitem__(self, index):
|
||||
return self.container._data[self.container_offset[index]]
|
||||
|
||||
def __setitem__(self, index, value):
|
||||
self.container._data[self.container_offset[index]] = value
|
||||
|
||||
#### offsets
|
||||
|
||||
def set_offset_from_list(self, offsets):
|
||||
self.container_offset = to_numpy_list(offsets)
|
||||
|
||||
def set_offset_from_ints(self, start, length):
|
||||
if length is None:
|
||||
raise errors.InvalidSegmentLength
|
||||
self.container_offset = np.arange(start, start + length, dtype=np.uint32)
|
||||
|
||||
def verify_offsets(self):
|
||||
self.enforce_offset_bounds()
|
||||
self.reverse_offset = self.calc_reverse_offsets()
|
||||
|
||||
def enforce_offset_bounds(self):
|
||||
self.container_offset = self.container_offset[self.container_offset < len(self.container)]
|
||||
|
||||
def calc_reverse_offsets(self):
|
||||
# Initialize array to out of range
|
||||
r = np.zeros(len(self.container), dtype=np.int32) - 1
|
||||
r[self.container_offset] = np.arange(len(self), dtype=np.int32)
|
||||
valid = np.where(r >= 0)[0]
|
||||
if len(valid) != len(self):
|
||||
raise errors.InvalidSegmentOrder
|
||||
return r
|
||||
|
||||
#### subset
|
||||
|
||||
def create_subset(self, new_order, *args, **kwargs):
|
||||
new_order_of_source = self.container_offset[new_order]
|
||||
segment = DefaultSegment(self.container, new_order_of_source, *args, **kwargs)
|
||||
return segment
|
||||
|
||||
#### serialization
|
||||
|
||||
def __getstate__(self):
|
||||
"""Custom jsonpickle state save routine
|
||||
|
||||
@ -485,12 +189,7 @@ class DefaultSegment:
|
||||
for key in self.extra_serializable_attributes:
|
||||
state[key] = getattr(self, key)
|
||||
r = self.rawdata
|
||||
state['_rawdata_bounds'] = list(r.byte_bounds_offset())
|
||||
if r.is_indexed:
|
||||
state['_order_list'] = r.order.tolist() # more compact serialization in python list
|
||||
else:
|
||||
state['_order_list'] = None
|
||||
state['memory_map'] = sorted([list(i) for i in self.memory_map.items()])
|
||||
state['container_offset'] = self.calc_serialized_container_offset()
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
@ -526,86 +225,6 @@ class DefaultSegment:
|
||||
log.debug(f"moving start_addr to origin: {self.start_addr}")
|
||||
delattr(self, 'start_addr')
|
||||
|
||||
def reconstruct_raw(self, rawdata):
|
||||
"""Reconstruct the pointers to the parent data arrays
|
||||
|
||||
Each segment is a view into the primary segment's data, so those
|
||||
pointers and the order must be restored in the child segments.
|
||||
"""
|
||||
start, end = self._rawdata_bounds
|
||||
r = rawdata[start:end]
|
||||
delattr(self, '_rawdata_bounds')
|
||||
try:
|
||||
if self._order_list:
|
||||
order = to_numpy_list(self._order_list)
|
||||
r = r.get_indexed(order)
|
||||
delattr(self, '_order_list')
|
||||
except AttributeError:
|
||||
pass
|
||||
self.set_raw(r)
|
||||
|
||||
def get_parallel_raw_data(self, other):
|
||||
""" Get the raw data that is similar to the specified other segment
|
||||
"""
|
||||
start, end = other.byte_bounds_offset()
|
||||
r = self.rawdata[start:end]
|
||||
if other.rawdata.is_indexed:
|
||||
r = r.get_indexed[other.order]
|
||||
return r
|
||||
|
||||
def serialize_session(self, mdict):
|
||||
"""Save extra metadata to a dict so that it can be serialized
|
||||
|
||||
This is not saved by __getstate__ because child segments will point to
|
||||
the same data and this allows it to only be saved for the base segment.
|
||||
As well as allowing it to be pulled out of the main json so that it can
|
||||
be more easily edited by hand if desired.
|
||||
"""
|
||||
mdict["comment ranges"] = [list(a) for a in self.get_style_ranges(comment=True)]
|
||||
mdict["data ranges"] = [list(a) for a in self.get_style_ranges(data=True)]
|
||||
for i in range(1, user_bit_mask):
|
||||
r = [list(a) for a in self.get_style_ranges(user=i)]
|
||||
if r:
|
||||
slot = "user style %d" % i
|
||||
mdict[slot] = r
|
||||
|
||||
# json serialization doesn't allow int keys, so convert to list of
|
||||
# pairs
|
||||
mdict["comments"] = self.get_sorted_comments()
|
||||
|
||||
def restore_session(self, e):
|
||||
if 'comments' in e:
|
||||
for k, v in e['comments']:
|
||||
self.rawdata.extra.comments[k] = v
|
||||
if 'comment ranges' in e:
|
||||
self.set_style_ranges(e['comment ranges'], comment=True)
|
||||
if 'data ranges' in e:
|
||||
self.set_style_ranges(e['data ranges'], user=data_style)
|
||||
if 'display list ranges' in e:
|
||||
# DEPRECATED, but supported on read. Converts display list to
|
||||
# disassembly type 0 for user index 1
|
||||
self.set_style_ranges(e['display list ranges'], data=True, user=1)
|
||||
self.set_user_data(e['display list ranges'], 1, 0)
|
||||
if 'user ranges 1' in e:
|
||||
# DEPRECATED, but supported on read. Converts user extra data 0
|
||||
# (antic dl), 1 (jumpman level), and 2 (jumpman harvest) to user
|
||||
# styles 2, 3, and 4. Data is now user style 1.
|
||||
for r, val in e['user ranges 1']:
|
||||
self.set_style_ranges([r], user=val + 2)
|
||||
for i in range(1, user_bit_mask):
|
||||
slot = "user style %d" % i
|
||||
if slot in e:
|
||||
self.set_style_ranges(e[slot], user=i)
|
||||
|
||||
def __str__(self):
|
||||
if self.origin > 0:
|
||||
origin = " @ %04x" % (self.origin)
|
||||
else:
|
||||
origin = ""
|
||||
s = "%s ($%x bytes%s)" % (self.name, len(self), origin)
|
||||
if self.error:
|
||||
s += " " + self.error
|
||||
return s
|
||||
|
||||
@property
|
||||
def verbose_info(self):
|
||||
@ -618,73 +237,38 @@ class DefaultSegment:
|
||||
s += " error='%s'" % self.error
|
||||
return s
|
||||
|
||||
def __len__(self):
|
||||
return self.rawdata.data_length
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self.data[index]
|
||||
|
||||
def __setitem__(self, index, value):
|
||||
self.data[index] = value
|
||||
self._search_copy = None
|
||||
|
||||
def byte_bounds_offset(self):
|
||||
"""Return start and end offsets of this segment's data into the
|
||||
base array's data
|
||||
"""
|
||||
return self.rawdata.byte_bounds_offset()
|
||||
|
||||
def is_valid_index(self, i):
|
||||
return i >= 0 and i < len(self)
|
||||
|
||||
def get_raw_index(self, i):
|
||||
"""Get index into base array's raw data, given the index into this
|
||||
segment
|
||||
"""
|
||||
return self.rawdata.get_raw_index(i)
|
||||
|
||||
def get_raw_index_from_address(self, addr):
|
||||
"""Get index into base array's raw data, given the address of a byte
|
||||
into this segment
|
||||
"""
|
||||
return self.get_raw_index(addr - self.origin)
|
||||
|
||||
def get_index_from_base_index(self, base_index):
|
||||
"""Get index into this array's data given the index into the base array
|
||||
"""
|
||||
r = self.rawdata
|
||||
try:
|
||||
index = r.get_reverse_index(base_index)
|
||||
except IndexError:
|
||||
raise IndexError("index %d not in this segment" % base_index)
|
||||
if index < 0:
|
||||
raise IndexError("index %d not in this segment" % base_index)
|
||||
return int(index)
|
||||
|
||||
def tobytes(self):
|
||||
return self.data.tobytes()
|
||||
return self.container._data[self.container_offset].tobytes()
|
||||
|
||||
def get_style_bits(self, **kwargs):
|
||||
return get_style_bits(**kwargs)
|
||||
return style_bits.get_style_bits(**kwargs)
|
||||
|
||||
def get_style_mask(self, **kwargs):
|
||||
return get_style_mask(**kwargs)
|
||||
return style_bits.get_style_mask(**kwargs)
|
||||
|
||||
def calc_source_indexes_from_ranges(self, ranges):
|
||||
source_indexes = np.zeros(len(self.container), dtype=np.uint8)
|
||||
offsets = self.container_offset
|
||||
for start, end in ranges:
|
||||
if end < start:
|
||||
start, end = end, start
|
||||
source_indexes[offsets[start:end]] = 1
|
||||
affected_source_indexes = np.where(source_indexes > 0)[0]
|
||||
return affected_source_indexes
|
||||
|
||||
def set_style_ranges(self, ranges, **kwargs):
|
||||
style_bits = self.get_style_bits(**kwargs)
|
||||
s = self.style
|
||||
for start, end in ranges:
|
||||
if end < start:
|
||||
start, end = end, start
|
||||
s[start:end] |= style_bits
|
||||
indexes = self.calc_source_indexes_from_ranges(ranges)
|
||||
self.container.set_style_at_indexes(indexes, **kwargs)
|
||||
|
||||
def clear_style_ranges(self, ranges, **kwargs):
|
||||
style_mask = self.get_style_mask(**kwargs)
|
||||
s = self.style
|
||||
for start, end in ranges:
|
||||
if end < start:
|
||||
start, end = end, start
|
||||
s[start:end] &= style_mask
|
||||
indexes = self.calc_source_indexes_from_ranges(ranges)
|
||||
self.container.clear_style_at_indexes(indexes, **kwargs)
|
||||
|
||||
def clear_style_bits(self, **kwargs):
|
||||
self.container.clear_style_at_indexes(self.container_offset, **kwargs)
|
||||
|
||||
def get_style_ranges(self, **kwargs):
|
||||
"""Return a list of start, end pairs that match the specified style
|
||||
@ -693,24 +277,6 @@ class DefaultSegment:
|
||||
matches = (self.style & style_bits) == style_bits
|
||||
return self.bool_to_ranges(matches)
|
||||
|
||||
def fixup_comments(self):
|
||||
"""Remove any style bytes that are marked as commented but have no
|
||||
comment, and add any style bytes where there's a comment but it isn't
|
||||
marked in the style data.
|
||||
|
||||
This happens on the base data, so only need to do this on one segment
|
||||
that uses this base data.
|
||||
"""
|
||||
style_base = self.rawdata.style_base
|
||||
comment_text_indexes = np.asarray(list(self.rawdata.extra.comments.keys()), dtype=np.uint32)
|
||||
comment_mask = self.get_style_mask(comment=True)
|
||||
has_comments = np.where(style_base & comment_bit_mask > 0)[0]
|
||||
both = np.intersect1d(comment_text_indexes, has_comments)
|
||||
log.info("fixup comments: %d correctly marked, %d without style, %d empty text" % (np.alen(both), np.alen(comment_text_indexes) - np.alen(both), np.alen(has_comments) - np.alen(both)))
|
||||
style_base &= comment_mask
|
||||
comment_style = self.get_style_bits(comment=True)
|
||||
style_base[comment_text_indexes] |= comment_style
|
||||
|
||||
def get_comment_locations(self, **kwargs):
|
||||
style_bits = self.get_style_bits(**kwargs)
|
||||
r = self.rawdata.copy()
|
||||
@ -719,61 +285,9 @@ class DefaultSegment:
|
||||
r.style_base[:] &= style_bits
|
||||
comment_indexes = np.asarray(list(self.rawdata.extra.comments.keys()), dtype=np.uint32)
|
||||
#print comment_indexes
|
||||
r.style_base[comment_indexes] |= comment_bit_mask
|
||||
r.style_base[comment_indexes] |= style_bits.comment_bit_mask
|
||||
return r.unindexed_style[:]
|
||||
|
||||
def get_entire_style_ranges(self, split_comments=None, **kwargs):
|
||||
"""Find sections of the segment that have the same style value.
|
||||
|
||||
The arguments to this function are used as a mask for the style to
|
||||
determine where to split the styles. Style bits that aren't included in
|
||||
the list will be ignored when splitting. The returned list covers the
|
||||
entire length of the segment.
|
||||
|
||||
Returns a list of tuples, each tuple containing two items: a start, end
|
||||
tuple; and an integer with the style value.
|
||||
"""
|
||||
style_bits = self.get_style_bits(**kwargs)
|
||||
matches = self.get_comment_locations(**kwargs)
|
||||
groups = np.split(matches, np.where(np.diff(matches) != 0)[0] + 1)
|
||||
if split_comments is None:
|
||||
split_comments = []
|
||||
# print groups
|
||||
# split into groups with the same numbers
|
||||
ranges = []
|
||||
last_end = 0
|
||||
if len(groups) == 1 and len(groups[0]) == 0:
|
||||
# check for degenerate case
|
||||
return
|
||||
last_style = -1
|
||||
for group in groups:
|
||||
# each group is guaranteed to have the same style
|
||||
size = len(group)
|
||||
next_end = last_end + size
|
||||
style = matches[last_end]
|
||||
masked_style = style & style_bits
|
||||
# print last_end, next_end, style, masked_style, size, group
|
||||
if style & comment_bit_mask:
|
||||
if masked_style in split_comments:
|
||||
# print "interesting comment", last_end, next_end
|
||||
ranges.append(((last_end, next_end), masked_style))
|
||||
else:
|
||||
# print "non-interesting comment", last_end, next_end
|
||||
if last_style == masked_style:
|
||||
((prev_end, _), _) = ranges.pop()
|
||||
ranges.append(((prev_end, next_end), masked_style))
|
||||
else:
|
||||
ranges.append(((last_end, next_end), masked_style))
|
||||
else:
|
||||
if last_style == masked_style:
|
||||
((prev_end, _), _) = ranges.pop()
|
||||
ranges.append(((prev_end, next_end), masked_style))
|
||||
else:
|
||||
ranges.append(((last_end, next_end), masked_style))
|
||||
last_style = masked_style
|
||||
last_end = next_end
|
||||
return ranges
|
||||
|
||||
def bool_to_ranges(self, matches):
|
||||
w = np.where(matches == True)[0]
|
||||
# split into groups with consecutive numbers
|
||||
@ -805,87 +319,6 @@ class DefaultSegment:
|
||||
return ranges[match_index][0]
|
||||
return None
|
||||
|
||||
def get_rect_indexes(self, anchor_start, anchor_end, bytes_per_row):
|
||||
# determine row,col of upper left and lower right of selected
|
||||
# rectangle. The values are inclusive, so ul=(0,0) and lr=(1,2)
|
||||
# is 2 rows and 3 columns. Columns need to be adjusted slightly
|
||||
# depending on quadrant of selection because anchor indexes are
|
||||
# measured as cursor positions, that is: positions between the
|
||||
# bytes where as rect select needs to think of the selections as
|
||||
# on the byte positions themselves, not in between.
|
||||
r1, c1 = divmod(anchor_start, bytes_per_row)
|
||||
r2, c2 = divmod(anchor_end, bytes_per_row)
|
||||
if c1 >= c2:
|
||||
# start column is to the right of the end column so columns
|
||||
# need to be swapped
|
||||
if r1 >= r2:
|
||||
# start row is below end row, so rows swapped as well
|
||||
c1, c2 = c2, c1 + 1
|
||||
r1, r2 = r2, r1
|
||||
elif c2 == 0:
|
||||
# When the cursor is at the end of a line, anchor_end points
|
||||
# to the first character of the next line. Handle this
|
||||
# special case by pointing to end of the previous line.
|
||||
c2 = bytes_per_row
|
||||
r2 -= 1
|
||||
else:
|
||||
c1, c2 = c2 - 1, c1 + 1
|
||||
else:
|
||||
# start column is to the left of the end column, so don't need
|
||||
# to swap columns
|
||||
if r1 > r2:
|
||||
# start row is below end row
|
||||
r1, r2 = r2, r1
|
||||
c2 += 1
|
||||
anchor_start = r1 * bytes_per_row + c1
|
||||
anchor_end = r2 * bytes_per_row + c2
|
||||
r2 += 1
|
||||
return anchor_start, anchor_end, (r1, c1), (r2, c2)
|
||||
|
||||
def set_style_ranges_rect(self, ranges, bytes_per_row, **kwargs):
|
||||
style_bits = self.get_style_bits(**kwargs)
|
||||
s = self.style
|
||||
for start, end in ranges:
|
||||
start, end, (r1, c1), (r2, c2) = self.get_rect_indexes(start, end, bytes_per_row)
|
||||
# Numpy tricks!
|
||||
# >>> c1 = 15
|
||||
# >>> r = 4 # r2 - r1
|
||||
# >>> c = 10 # c2 - c1
|
||||
# >>> width = 40
|
||||
# >>> np.arange(c)
|
||||
#array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
|
||||
# >>> np.arange(r) * width
|
||||
#array([ 0, 40, 80, 120])
|
||||
# >>> np.tile(np.arange(c), r) + np.repeat(np.arange(r)*width, c)
|
||||
#array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 40, 41, 42,
|
||||
# 43, 44, 45, 46, 47, 48, 49, 80, 81, 82, 83, 84, 85,
|
||||
# 86, 87, 88, 89, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129])
|
||||
# >>> np.tile(np.arange(c), r) + np.repeat(np.arange(r)*width, c) + c1
|
||||
#array([ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 55, 56, 57,
|
||||
# 58, 59, 60, 61, 62, 63, 64, 95, 96, 97, 98, 99, 100,
|
||||
# 101, 102, 103, 104, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144])
|
||||
r = r2 - r1
|
||||
c = c2 - c1
|
||||
indexes = np.tile(np.arange(c), r) + np.repeat(np.arange(r) * bytes_per_row, c) + start
|
||||
|
||||
# Limit the indexes actually used to the size of the array, because
|
||||
# if the region has an incomplete last line, the style setting
|
||||
# would fail because it isn't be a perfect rectangle
|
||||
clamped = indexes[np.where(np.less(indexes, len(self)))[0]]
|
||||
s[clamped] |= style_bits
|
||||
|
||||
def rects_to_ranges(self, rects, bytes_per_row):
|
||||
ranges = []
|
||||
for (r1, c1), (r2, c2) in rects:
|
||||
start = r1 * bytes_per_row + c1
|
||||
end = (r2 - 1) * bytes_per_row + c2
|
||||
ranges.append((start, end))
|
||||
return ranges
|
||||
|
||||
def clear_style_bits(self, **kwargs):
|
||||
style_mask = self.get_style_mask(**kwargs)
|
||||
self.style &= style_mask
|
||||
|
||||
def set_user_data(self, ranges, user_index, user_data):
|
||||
for start, end in ranges:
|
||||
# FIXME: this is slow
|
||||
@ -919,13 +352,6 @@ class DefaultSegment:
|
||||
ranges.append([[start, end], current])
|
||||
return ranges
|
||||
|
||||
def get_style_at_indexes(self, indexes):
|
||||
return self.style[indexes]
|
||||
|
||||
def set_style_at_indexes(self, indexes, **kwargs):
|
||||
style_bits = self.get_style_bits(**kwargs)
|
||||
self.style[indexes] |= style_bits
|
||||
|
||||
def remove_comments_at_indexes(self, indexes):
|
||||
for where_index in indexes:
|
||||
self.remove_comment(where_index)
|
||||
@ -947,7 +373,7 @@ class DefaultSegment:
|
||||
def get_comments_at_indexes(self, indexes):
|
||||
"""Get a list of comments at specified indexes"""
|
||||
s = self.style[indexes]
|
||||
has_comments = np.where(s & comment_bit_mask > 0)[0]
|
||||
has_comments = np.where(s & style_bits.comment_bit_mask > 0)[0]
|
||||
comments = []
|
||||
for where_index in has_comments:
|
||||
raw = self.get_raw_index(indexes[where_index])
|
||||
@ -1083,8 +509,8 @@ class DefaultSegment:
|
||||
def compare_segment(self, other_segment):
|
||||
self.clear_style_bits(diff=True)
|
||||
diff = self.rawdata.data != other_segment.rawdata.data
|
||||
d = diff * np.uint8(diff_bit_mask)
|
||||
self.style |= (diff * np.uint8(diff_bit_mask))
|
||||
d = diff * np.uint8(style_bits.diff_bit_mask)
|
||||
self.style |= (diff * np.uint8(style_bits.diff_bit_mask))
|
||||
log.debug("compare_segment: # entries %d, # diffs: %d" % (len(diff), len(np.where(diff == True)[0])))
|
||||
|
||||
|
||||
|
51
atrcopy/style_bits.py
Normal file
51
atrcopy/style_bits.py
Normal file
@ -0,0 +1,51 @@
|
||||
import numpy as np
|
||||
|
||||
import logging
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
user_bit_mask = 0x07
|
||||
data_style = 0x1
|
||||
not_user_bit_mask = 0xff ^ user_bit_mask
|
||||
diff_bit_mask = 0x10
|
||||
match_bit_mask = 0x20
|
||||
comment_bit_mask = 0x40
|
||||
selected_bit_mask = 0x80
|
||||
|
||||
|
||||
def get_style_bits(match=False, comment=False, selected=False, data=False, diff=False, user=0):
|
||||
""" Return an int value that contains the specified style bits set.
|
||||
|
||||
Available styles for each byte are:
|
||||
|
||||
match: part of the currently matched search
|
||||
comment: user commented area
|
||||
selected: selected region
|
||||
data: labeled in the disassembler as a data region (i.e. not disassembled)
|
||||
"""
|
||||
style_bits = 0
|
||||
if user:
|
||||
style_bits |= (user & user_bit_mask)
|
||||
if diff:
|
||||
style_bits |= diff_bit_mask
|
||||
if match:
|
||||
style_bits |= match_bit_mask
|
||||
if comment:
|
||||
style_bits |= comment_bit_mask
|
||||
if data:
|
||||
style_bits |= (data_style & user_bit_mask)
|
||||
if selected:
|
||||
style_bits |= selected_bit_mask
|
||||
return style_bits
|
||||
|
||||
|
||||
def get_style_mask(**kwargs):
|
||||
"""Get the bit mask that, when anded with data, will turn off the
|
||||
selected bits
|
||||
"""
|
||||
bits = get_style_bits(**kwargs)
|
||||
if 'user' in kwargs and kwargs['user']:
|
||||
bits |= user_bit_mask
|
||||
else:
|
||||
bits &= (0xff ^ user_bit_mask)
|
||||
return 0xff ^ bits
|
13
setup.py
13
setup.py
@ -24,7 +24,18 @@ setup(name="atrcopy",
|
||||
packages=["atrcopy"],
|
||||
include_package_data=True,
|
||||
scripts=scripts,
|
||||
entry_points={"sawx.loaders": 'atrcopy = atrcopy.omnivore_loader'},
|
||||
entry_points={
|
||||
"sawx.loaders": [
|
||||
'atrcopy = atrcopy.omnivore_loader',
|
||||
],
|
||||
|
||||
"atrcopy.containers": [
|
||||
'gzip = atrcopy.containers.gzip',
|
||||
'bzip = atrcopy.containers.bzip',
|
||||
'lzma = atrcopy.containers.lzma',
|
||||
'dcm = atrcopy.containers.dcm',
|
||||
],
|
||||
},
|
||||
description="Utility to manage file systems on Atari 8-bit (DOS 2) and Apple ][ (DOS 3.3) disk images.",
|
||||
long_description=long_description,
|
||||
license="GPL",
|
||||
|
@ -4,7 +4,9 @@ import numpy as np
|
||||
|
||||
from mock import *
|
||||
|
||||
from atrcopy import SegmentData, iter_parsers
|
||||
from atrcopy.container import guess_container
|
||||
from atrcopy.parser import iter_parsers
|
||||
from atrcopy import get_xex, interleave_segments, user_bit_mask, diff_bit_mask
|
||||
from atrcopy import errors
|
||||
|
||||
|
||||
@ -20,8 +22,8 @@ class BaseContainerTest:
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
rawdata = SegmentData(sample_data.copy())
|
||||
mime, parser = iter_parsers(rawdata)
|
||||
container = guess_container(sample_data)
|
||||
mime, parser = iter_parsers(container)
|
||||
assert mime == self.expected_mime
|
||||
assert len(parser.image.files) == self.num_files_in_sample
|
||||
|
||||
|
@ -7,7 +7,8 @@ import os
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from atrcopy.indexed_segment import SourceSegment, IndexedSegment
|
||||
from atrcopy.container import DiskImageContainer
|
||||
from atrcopy.segments import DefaultSegment
|
||||
from atrcopy import get_xex, interleave_segments, user_bit_mask, diff_bit_mask
|
||||
from atrcopy import errors
|
||||
from functools import reduce
|
||||
@ -72,34 +73,34 @@ class TestIndexed:
|
||||
def setup(self):
|
||||
data = np.arange(4096, dtype=np.uint8)
|
||||
data[1::2] = np.repeat(np.arange(16, dtype=np.uint8), 128)
|
||||
self.source = SourceSegment(data)
|
||||
self.segment = IndexedSegment(self.source, 0, length=len(self.source))
|
||||
self.container = DiskImageContainer(data)
|
||||
self.segment = DefaultSegment(self.container, 0, length=len(self.container))
|
||||
|
||||
def test_offsets(self):
|
||||
assert np.array_equal(self.segment.offset_into_source, np.arange(len(self.source)))
|
||||
assert np.array_equal(self.segment.container_offset, np.arange(len(self.container)))
|
||||
|
||||
def test_subset(self):
|
||||
# get indexed, will result in every 3th byte
|
||||
s, indexes = get_indexed(self.segment, 256, 3)
|
||||
assert np.array_equal(s.offset_into_source, indexes)
|
||||
assert np.array_equal(s.container_offset, indexes)
|
||||
for i in range(len(indexes)):
|
||||
index_in_source = i * 3
|
||||
assert np.array_equal(s.offset_into_source[i], index_in_source)
|
||||
assert np.array_equal(s.container_offset[i], index_in_source)
|
||||
s[i] = 33
|
||||
assert s[i] == self.source[index_in_source]
|
||||
self.source[index_in_source] = 3
|
||||
assert s[i] == self.source[index_in_source]
|
||||
assert s[i] == self.container[index_in_source]
|
||||
self.container[index_in_source] = 3
|
||||
assert s[i] == self.container[index_in_source]
|
||||
|
||||
# get indexed into indexed, will result in every 9th byte
|
||||
s2, indexes2 = get_indexed(s, 64, 3)
|
||||
assert np.array_equal(s2.offset_into_source, indexes2 * 3)
|
||||
assert np.array_equal(s2.container_offset, indexes2 * 3)
|
||||
for i in range(len(indexes2)):
|
||||
index_in_source = i * 9
|
||||
assert np.array_equal(s2.offset_into_source[i], index_in_source)
|
||||
assert np.array_equal(s2.container_offset[i], index_in_source)
|
||||
s2[i] = 99
|
||||
assert s2[i] == self.source[index_in_source]
|
||||
self.source[index_in_source] = 9
|
||||
assert s2[i] == self.source[index_in_source]
|
||||
assert s2[i] == self.container[index_in_source]
|
||||
self.container[index_in_source] = 9
|
||||
assert s2[i] == self.container[index_in_source]
|
||||
|
||||
# def test_indexed_sub(self):
|
||||
# base = self.segment
|
||||
|
Loading…
x
Reference in New Issue
Block a user