mirror of
https://github.com/robmcmullen/atrcopy.git
synced 2024-06-11 13:29:33 +00:00
f1b0f5ebac
* added functions to copy a segment & its base so comments can be marked on the copy of the base and show up in the copy of the segment
971 lines
35 KiB
Python
971 lines
35 KiB
Python
import bisect
|
|
import cStringIO
|
|
import uuid
|
|
|
|
import numpy as np
|
|
|
|
from errors import *
|
|
from utils import to_numpy, to_numpy_list
|
|
|
|
user_bit_mask = 0x07
|
|
data_style = 0x1
|
|
not_user_bit_mask = 0xff ^ user_bit_mask
|
|
diff_bit_mask = 0x10
|
|
match_bit_mask = 0x20
|
|
comment_bit_mask = 0x40
|
|
selected_bit_mask = 0x80
|
|
|
|
def get_style_bits(match=False, comment=False, selected=False, data=False, diff=False, user=0):
|
|
""" Return an int value that contains the specified style bits set.
|
|
|
|
Available styles for each byte are:
|
|
|
|
match: part of the currently matched search
|
|
comment: user commented area
|
|
selected: selected region
|
|
data: labeled in the disassembler as a data region (i.e. not disassembled)
|
|
"""
|
|
style_bits = 0
|
|
if user:
|
|
style_bits |= (user & user_bit_mask)
|
|
if diff:
|
|
style_bits |= diff_bit_mask
|
|
if match:
|
|
style_bits |= match_bit_mask
|
|
if comment:
|
|
style_bits |= comment_bit_mask
|
|
if data:
|
|
style_bits |= (data_style & user_bit_mask)
|
|
if selected:
|
|
style_bits |= selected_bit_mask
|
|
return style_bits
|
|
|
|
def get_style_mask(**kwargs):
|
|
"""Get the bit mask that, when anded with data, will turn off the
|
|
selected bits
|
|
"""
|
|
bits = get_style_bits(**kwargs)
|
|
if 'user' in kwargs and kwargs['user']:
|
|
bits |= user_bit_mask
|
|
else:
|
|
bits &= (0xff ^ user_bit_mask)
|
|
return 0xff ^ bits
|
|
|
|
|
|
class SegmentSaver(object):
|
|
export_data_name = "Raw Data"
|
|
export_extensions = [".dat"]
|
|
|
|
@classmethod
|
|
def encode_data(cls, segment, ui_control):
|
|
return segment.tostring()
|
|
|
|
|
|
class OrderWrapper(object):
|
|
"""Wrapper for numpy data so that manipulations can use normal numpy syntax
|
|
and still affect the data according to the byte ordering.
|
|
|
|
Numpy's fancy indexing can't be used for setting set values, so this
|
|
intermediate layer is needed that defines the __setitem__ method that
|
|
explicitly references the byte ordering in the data array.
|
|
"""
|
|
def __init__(self, data, byte_order):
|
|
self.np_data = data
|
|
self.base = data.base # base array for numpy bounds determination
|
|
self.order = byte_order
|
|
|
|
def __len__(self):
|
|
return np.alen(self.order)
|
|
|
|
def __and__(self, other):
|
|
return self.np_data[self.order] & other
|
|
|
|
def __iand__(self, other):
|
|
self.np_data[self.order] &= other
|
|
return self
|
|
|
|
def __getitem__(self, index):
|
|
return self.np_data[self.order[index]]
|
|
|
|
def __setitem__(self, index, value):
|
|
self.np_data[self.order[index]] = value
|
|
|
|
def sub_index(self, index):
|
|
"""Return index of index so it can be used directly in a new
|
|
SegmentData object, rather than propagating multiple index lookups by
|
|
contructing a new OrderWrapper that calls parent OrderWrapper objects.
|
|
"""
|
|
return self.order[index]
|
|
|
|
@property
|
|
def shape(self):
|
|
return (len(self),)
|
|
|
|
@property
|
|
def unindexed(self):
|
|
return self.np_data[self.order]
|
|
|
|
def tostring(self):
|
|
return self.np_data[self.order].tostring()
|
|
|
|
class UserExtraData(object):
|
|
def __init__(self):
|
|
self.comments = dict()
|
|
self.user_data = dict()
|
|
for i in range(1, user_bit_mask):
|
|
self.user_data[i] = dict()
|
|
|
|
|
|
class SegmentData(object):
|
|
def __init__(self, data, style=None, extra=None, debug=False, order=None):
|
|
"""Storage for raw data
|
|
|
|
order is a list into the base array's data; each item in the list is an
|
|
index of the base array. E.g. if the base array is the 20 element list
|
|
containing the data [100, 101, ... 119] and the order is [10, 0, 5, 2],
|
|
the segment data used is [110, 100, 105, 102]
|
|
"""
|
|
self.order = order
|
|
self.is_indexed = order is not None
|
|
if self.is_indexed:
|
|
self.data = OrderWrapper(data, order)
|
|
else:
|
|
self.data = to_numpy(data)
|
|
if style is None:
|
|
if debug:
|
|
self.style = np.arange(len(self), dtype=np.uint8)
|
|
else:
|
|
self.style = np.zeros(len(self), dtype=np.uint8)
|
|
else:
|
|
if self.is_indexed:
|
|
self.style = OrderWrapper(style, order)
|
|
else:
|
|
self.style = style
|
|
if extra is None:
|
|
extra = UserExtraData()
|
|
self.extra = extra
|
|
self.reverse_index_mapping = None
|
|
|
|
def __str__(self):
|
|
return "SegmentData id=%x indexed=%s data=%s" % (id(self), self.is_indexed, type(self.data))
|
|
|
|
def __len__(self):
|
|
return len(self.data)
|
|
|
|
@property
|
|
def stringio(self):
|
|
buf = cStringIO.StringIO(self.data[:])
|
|
return buf
|
|
|
|
@property
|
|
def data_base(self):
|
|
return self.data.base if self.data.base is not None else self.data
|
|
|
|
@property
|
|
def style_base(self):
|
|
return self.style.base if self.style.base is not None else self.style
|
|
|
|
def get_data(self):
|
|
return self.data
|
|
|
|
def get_style(self):
|
|
return self.style
|
|
|
|
@property
|
|
def unindexed_view(self):
|
|
if self.is_indexed:
|
|
return self.data.unindexed
|
|
return self.data
|
|
|
|
def byte_bounds_offset(self):
|
|
"""Return start and end offsets of this segment's data into the
|
|
base array's data.
|
|
|
|
This ignores the byte order index. Arrays using the byte order index
|
|
will have the entire base array's raw data.
|
|
"""
|
|
if self.data.base is None:
|
|
if self.is_indexed:
|
|
basearray = self.data.np_data
|
|
else:
|
|
basearray = self.data
|
|
return 0, len(basearray)
|
|
data_start, data_end = np.byte_bounds(self.data)
|
|
base_start, base_end = np.byte_bounds(self.data.base)
|
|
return int(data_start - base_start), int(data_end - base_start)
|
|
|
|
def get_raw_index(self, i):
|
|
"""Get index into base array's raw data, given the index into this
|
|
segment
|
|
"""
|
|
if self.is_indexed:
|
|
return int(self.order[i])
|
|
if self.data.base is None:
|
|
return int(i)
|
|
data_start, data_end = np.byte_bounds(self.data)
|
|
base_start, base_end = np.byte_bounds(self.data.base)
|
|
return int(data_start - base_start + i)
|
|
|
|
def get_indexes_from_base(self):
|
|
"""Get array of indexes from the base array, as if this raw data were
|
|
indexed.
|
|
"""
|
|
if self.is_indexed:
|
|
return np.copy(self.order[i])
|
|
if self.data.base is None:
|
|
i = 0
|
|
else:
|
|
i = self.get_raw_index(0)
|
|
return np.arange(i, i + len(self), dtype=np.uint32)
|
|
|
|
def __getitem__(self, index):
|
|
if self.is_indexed:
|
|
order = self.data.sub_index(index)
|
|
d = self.data.np_data
|
|
s = self.style.np_data
|
|
else:
|
|
order = None
|
|
d = self.data[index]
|
|
s = self.style[index]
|
|
e = self.extra
|
|
return SegmentData(d, s, e, order=order)
|
|
|
|
def copy(self):
|
|
if self.is_indexed:
|
|
d = self.data.np_data.copy()
|
|
s = self.style.np_data.copy()
|
|
copy = SegmentData(d, s, order=self.order)
|
|
elif self.data.base is None:
|
|
# if there is no base array, we aren't looking at a slice so we
|
|
# must be copying the entire array.
|
|
d = self.data.copy()
|
|
s = self.style.copy()
|
|
copy = SegmentData(d, s)
|
|
else:
|
|
d = self.data.base.copy()
|
|
s = self.style.base.copy()
|
|
start, end = self.byte_bounds_offset()
|
|
print "copy: start, end =", start, end
|
|
copy = SegmentData(d[start:end], s[start:end])
|
|
return copy
|
|
|
|
def get_bases(self):
|
|
if self.data.base is None:
|
|
data_base = self.data
|
|
style_base = self.style
|
|
else:
|
|
data_base = self.data.base
|
|
style_base = self.style.base
|
|
return data_base, style_base
|
|
|
|
def get_indexed(self, index):
|
|
index = to_numpy_list(index)
|
|
if self.is_indexed:
|
|
return self[index]
|
|
|
|
# check to make sure all indexes are valid, raises IndexError if not
|
|
check = self.data[index]
|
|
|
|
# index needs to be relative to the base array
|
|
base_index = index + self.get_raw_index(0)
|
|
data_base, style_base = self.get_bases()
|
|
return SegmentData(data_base, style_base, self.extra, order=base_index)
|
|
|
|
def get_reverse_index(self, base_index):
|
|
"""Get index into this segment's data given the index into the base data
|
|
|
|
Raises IndexError if the base index doesn't map to anything in this
|
|
segment's data
|
|
"""
|
|
if self.is_indexed:
|
|
if not self.reverse_index_mapping:
|
|
self.reverse_index_mapping = dict([(k,i) for i,k in enumerate(self.order)])
|
|
try:
|
|
return self.reverse_index_mapping[base_index]
|
|
except KeyError:
|
|
raise IndexError("index %d not mapped in this segment" % base_index)
|
|
else:
|
|
if self.data.base is None:
|
|
return int(base_index)
|
|
data_start, data_end = np.byte_bounds(self.data)
|
|
base_start, base_end = np.byte_bounds(self.data.base)
|
|
return int(base_start + base_index - data_start)
|
|
|
|
|
|
class DefaultSegment(object):
|
|
savers = [SegmentSaver]
|
|
|
|
def __init__(self, rawdata, start_addr=0, name="All", error=None, verbose_name=None):
|
|
self.start_addr = int(start_addr) # force python int to decouple from possibly being a numpy datatype
|
|
self.set_raw(rawdata)
|
|
self.error = error
|
|
self.name = name
|
|
self.verbose_name = verbose_name
|
|
self.page_size = -1
|
|
self.map_width = 40
|
|
self.uuid = str(uuid.uuid4())
|
|
|
|
def set_raw(self, rawdata):
|
|
self.rawdata = rawdata
|
|
self.data = rawdata.get_data()
|
|
self.style = rawdata.get_style()
|
|
self._search_copy = None
|
|
|
|
def get_raw(self):
|
|
return self.rawdata
|
|
|
|
def __getstate__(self):
|
|
state = dict()
|
|
for key in ['start_addr', 'error', 'name', 'verbose_name', 'page_size', 'map_width', 'uuid']:
|
|
state[key] = getattr(self, key)
|
|
r = self.rawdata
|
|
state['_rawdata_bounds'] = list(r.byte_bounds_offset())
|
|
if r.is_indexed:
|
|
state['_order_list'] = r.order.tolist() # more compact serialization in python list
|
|
else:
|
|
state['_order_list'] = None
|
|
return state
|
|
|
|
def reconstruct_missing(self):
|
|
"""Any instance attributes set in __init__, but added after some save
|
|
files exist in the wild should be checked for and given default values
|
|
if not present.
|
|
|
|
The use of jsonpickle to recreate objects doesn't go through __init__,
|
|
so newer attributes won't exist.
|
|
"""
|
|
if not hasattr(self, 'uuid'):
|
|
self.uuid = str(uuid.uuid4())
|
|
|
|
def reconstruct_raw(self, rawdata):
|
|
start, end = self._rawdata_bounds
|
|
r = rawdata[start:end]
|
|
delattr(self, '_rawdata_bounds')
|
|
try:
|
|
if self._order_list:
|
|
order = to_numpy_list(self._order_list)
|
|
r = r.get_indexed(order)
|
|
delattr(self, '_order_list')
|
|
except AttributeError:
|
|
pass
|
|
self.set_raw(r)
|
|
self.reconstruct_missing()
|
|
|
|
def get_parallel_raw_data(self, other):
|
|
""" Get the raw data that is similar to the specified other segment
|
|
"""
|
|
start, end = other.byte_bounds_offset()
|
|
r = self.rawdata[start:end]
|
|
if other.rawdata.is_indexed:
|
|
r = r.get_indexed[other.order]
|
|
return r
|
|
|
|
def serialize_extra_to_dict(self, mdict):
|
|
"""Save extra metadata to a dict so that it can be serialized
|
|
|
|
This is not saved by __getstate__ because child segments will point to
|
|
the same data and this allows it to only be saved for the base segment.
|
|
As well as allowing it to be pulled out of the main json so that it can
|
|
be more easily edited by hand if desired.
|
|
"""
|
|
mdict["comment ranges"] = [list(a) for a in self.get_style_ranges(comment=True)]
|
|
mdict["data ranges"] = [list(a) for a in self.get_style_ranges(data=True)]
|
|
for i in range(1, user_bit_mask):
|
|
r = [list(a) for a in self.get_style_ranges(user=i)]
|
|
if r:
|
|
slot = "user style %d" % i
|
|
mdict[slot] = r
|
|
|
|
# json serialization doesn't allow int keys, so convert to list of
|
|
# pairs
|
|
mdict["comments"] = self.get_sorted_comments()
|
|
|
|
def restore_extra_from_dict(self, e):
|
|
if 'comments' in e:
|
|
for k, v in e['comments']:
|
|
self.rawdata.extra.comments[k] = v
|
|
if 'comment ranges' in e:
|
|
self.set_style_ranges(e['comment ranges'], comment=True)
|
|
if 'data ranges' in e:
|
|
self.set_style_ranges(e['data ranges'], user=data_style)
|
|
if 'display list ranges' in e:
|
|
# DEPRECATED, but supported on read. Converts display list to
|
|
# disassembly type 0 for user index 1
|
|
self.set_style_ranges(e['display list ranges'], data=True, user=1)
|
|
self.set_user_data(e['display list ranges'], 1, 0)
|
|
if 'user ranges 1' in e:
|
|
# DEPRECATED, but supported on read. Converts user extra data 0
|
|
# (antic dl), 1 (jumpman level), and 2 (jumpman harvest) to user
|
|
# styles 2, 3, and 4. Data is now user style 1.
|
|
for r, val in e['user ranges 1']:
|
|
self.set_style_ranges([r], user=val + 2)
|
|
for i in range(1, user_bit_mask):
|
|
slot = "user style %d" % i
|
|
if slot in e:
|
|
self.set_style_ranges(e[slot], user=i)
|
|
|
|
|
|
def __str__(self):
|
|
if self.start_addr > 0:
|
|
origin = " @ %04x" % (self.start_addr)
|
|
else:
|
|
origin = ""
|
|
s = "%s ($%x bytes%s)" % (self.name, len(self), origin)
|
|
if self.error:
|
|
s += " " + self.error
|
|
return s
|
|
|
|
@property
|
|
def verbose_info(self):
|
|
name = self.verbose_name or self.name
|
|
if self.rawdata.is_indexed:
|
|
s = "%s ($%04x bytes) non-contiguous file; file index of first byte: $%04x" % (name, len(self), self.rawdata.order[0])
|
|
else:
|
|
s = "%s ($%04x bytes)" % (name, len(self))
|
|
if self.error:
|
|
s += " error='%s'" % self.error
|
|
return s
|
|
|
|
def __len__(self):
|
|
return len(self.rawdata)
|
|
|
|
def __getitem__(self, index):
|
|
return self.data[index]
|
|
|
|
def __setitem__(self, index, value):
|
|
self.data[index] = value
|
|
self._search_copy = None
|
|
|
|
def byte_bounds_offset(self):
|
|
"""Return start and end offsets of this segment's data into the
|
|
base array's data
|
|
"""
|
|
return self.rawdata.byte_bounds_offset()
|
|
|
|
def is_valid_index(self, i):
|
|
return i >= 0 and i < len(self)
|
|
|
|
def get_raw_index(self, i):
|
|
"""Get index into base array's raw data, given the index into this
|
|
segment
|
|
"""
|
|
return self.rawdata.get_raw_index(i)
|
|
|
|
def get_index_from_base_index(self, base_index):
|
|
"""Get index into this array's data given the index into the base array
|
|
"""
|
|
r = self.rawdata
|
|
if r.is_indexed:
|
|
index = r.get_reverse_index(base_index)
|
|
else:
|
|
index = base_index - r.get_raw_index(0)
|
|
if not self.is_valid_index(index):
|
|
raise IndexError("index %d not in this segment" % base_index)
|
|
return index
|
|
|
|
def tostring(self):
|
|
return self.data.tostring()
|
|
|
|
def get_style_bits(self, **kwargs):
|
|
return get_style_bits(**kwargs)
|
|
|
|
def get_style_mask(self, **kwargs):
|
|
return get_style_mask(**kwargs)
|
|
|
|
def set_style_ranges(self, ranges, **kwargs):
|
|
style_bits = self.get_style_bits(**kwargs)
|
|
s = self.style
|
|
for start, end in ranges:
|
|
if end < start:
|
|
start, end = end, start
|
|
s[start:end] |= style_bits
|
|
|
|
def clear_style_ranges(self, ranges, **kwargs):
|
|
style_mask = self.get_style_mask(**kwargs)
|
|
s = self.style
|
|
for start, end in ranges:
|
|
if end < start:
|
|
start, end = end, start
|
|
s[start:end] &= style_mask
|
|
|
|
def get_style_ranges(self, **kwargs):
|
|
"""Return a list of start, end pairs that match the specified style
|
|
"""
|
|
style_bits = self.get_style_bits(**kwargs)
|
|
matches = (self.style & style_bits) == style_bits
|
|
return self.bool_to_ranges(matches)
|
|
|
|
def get_comment_locations(self, **kwargs):
|
|
style_bits = self.get_style_bits(**kwargs)
|
|
r = self.rawdata.copy()
|
|
print len(r.style)
|
|
print len(r.style_base)
|
|
base = r.style_base & style_bits
|
|
comment_indexes = np.asarray(self.rawdata.extra.comments.keys(), dtype=np.uint32)
|
|
print comment_indexes
|
|
base[comment_indexes] |= comment_bit_mask
|
|
return r.style
|
|
|
|
def get_entire_style_ranges(self, split_comments=None, **kwargs):
|
|
"""Find sections of the segment that have the same style value.
|
|
|
|
The arguments to this function are used as a mask for the style to
|
|
determine where to split the styles. Style bits that aren't included in
|
|
the list will be ignored when splitting. The returned list covers the
|
|
entire length of the segment.
|
|
|
|
Returns a list of tuples, each tuple containing two items: a start, end
|
|
tuple; and an integer with the style value.
|
|
"""
|
|
style_bits = self.get_style_bits(**kwargs)
|
|
matches = self.get_comment_locations(**kwargs)
|
|
groups = np.split(matches, np.where(np.diff(matches) != 0)[0] + 1)
|
|
# print groups
|
|
# split into groups with the same numbers
|
|
ranges = []
|
|
last_end = 0
|
|
if len(groups) == 1 and len(groups[0]) == 0:
|
|
# check for degenerate case
|
|
return
|
|
last_style = -1
|
|
for group in groups:
|
|
# each group is guaranteed to have the same style
|
|
size = len(group)
|
|
next_end = last_end + size
|
|
style = matches[last_end]
|
|
masked_style = style & style_bits
|
|
# print last_end, next_end, style, masked_style, size, group
|
|
if style & comment_bit_mask:
|
|
if masked_style in split_comments:
|
|
# print "interesting comment", last_end, next_end
|
|
ranges.append(((last_end, next_end), masked_style))
|
|
else:
|
|
# print "non-interesting comment", last_end, next_end
|
|
if last_style == masked_style:
|
|
((prev_end, _), _) = ranges.pop()
|
|
ranges.append(((prev_end, next_end), masked_style))
|
|
else:
|
|
ranges.append(((last_end, next_end), masked_style))
|
|
else:
|
|
if last_style == masked_style:
|
|
((prev_end, _), _) = ranges.pop()
|
|
ranges.append(((prev_end, next_end), masked_style))
|
|
else:
|
|
ranges.append(((last_end, next_end), masked_style))
|
|
last_style = masked_style
|
|
last_end = next_end
|
|
return ranges
|
|
|
|
def bool_to_ranges(self, matches):
|
|
w = np.where(matches == True)[0]
|
|
# split into groups with consecutive numbers
|
|
groups = np.split(w, np.where(np.diff(w) != 1)[0] + 1)
|
|
ranges = []
|
|
for group in groups:
|
|
if np.alen(group) > 0:
|
|
ranges.append((int(group[0]), int(group[-1]) + 1))
|
|
return ranges
|
|
|
|
def find_next(self, index, **kwargs):
|
|
ranges = self.get_style_ranges(**kwargs)
|
|
if len(ranges) > 0:
|
|
index_tuple = (index + 1, 0)
|
|
match_index = bisect.bisect_right(ranges, index_tuple)
|
|
if match_index >= len(ranges):
|
|
match_index = 0
|
|
return ranges[match_index][0]
|
|
return None
|
|
|
|
def find_previous(self, index, **kwargs):
|
|
ranges = self.get_style_ranges(**kwargs)
|
|
if len(ranges) > 0:
|
|
index_tuple = (index - 1, 0)
|
|
match_index = bisect.bisect_left(ranges, index_tuple)
|
|
match_index -= 1
|
|
if match_index < 0:
|
|
match_index = len(ranges) - 1
|
|
return ranges[match_index][0]
|
|
return None
|
|
|
|
def get_rect_indexes(self, anchor_start, anchor_end):
|
|
# determine row,col of upper left and lower right of selected
|
|
# rectangle. The values are inclusive, so ul=(0,0) and lr=(1,2)
|
|
# is 2 rows and 3 columns. Columns need to be adjusted slightly
|
|
# depending on quadrant of selection because anchor indexes are
|
|
# measured as cursor positions, that is: positions between the
|
|
# bytes where as rect select needs to think of the selections as
|
|
# on the byte positions themselves, not in between.
|
|
bpr = self.map_width
|
|
r1, c1 = divmod(anchor_start, bpr)
|
|
r2, c2 = divmod(anchor_end, bpr)
|
|
if c1 >= c2:
|
|
# start column is to the right of the end column so columns
|
|
# need to be swapped
|
|
if r1 >= r2:
|
|
# start row is below end row, so rows swapped as well
|
|
c1, c2 = c2, c1 + 1
|
|
r1, r2 = r2, r1
|
|
elif c2 == 0:
|
|
# When the cursor is at the end of a line, anchor_end points
|
|
# to the first character of the next line. Handle this
|
|
# special case by pointing to end of the previous line.
|
|
c2 = bpr
|
|
r2 -= 1
|
|
else:
|
|
c1, c2 = c2 - 1, c1 + 1
|
|
else:
|
|
# start column is to the left of the end column, so don't need
|
|
# to swap columns
|
|
if r1 > r2:
|
|
# start row is below end row
|
|
r1, r2 = r2, r1
|
|
c2 += 1
|
|
anchor_start = r1 * bpr + c1
|
|
anchor_end = r2 * bpr + c2
|
|
r2 += 1
|
|
return anchor_start, anchor_end, (r1, c1), (r2, c2)
|
|
|
|
def set_style_ranges_rect(self, ranges, **kwargs):
|
|
style_bits = self.get_style_bits(**kwargs)
|
|
s = self.style
|
|
for start, end in ranges:
|
|
start, end, (r1, c1), (r2, c2) = self.get_rect_indexes(start, end)
|
|
# Numpy tricks!
|
|
# >>> c1 = 15
|
|
# >>> r = 4 # r2 - r1
|
|
# >>> c = 10 # c2 - c1
|
|
# >>> width = 40
|
|
# >>> np.arange(c)
|
|
#array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
|
|
# >>> np.arange(r) * width
|
|
#array([ 0, 40, 80, 120])
|
|
# >>> np.tile(np.arange(c), r) + np.repeat(np.arange(r)*width, c)
|
|
#array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 40, 41, 42,
|
|
# 43, 44, 45, 46, 47, 48, 49, 80, 81, 82, 83, 84, 85,
|
|
# 86, 87, 88, 89, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129])
|
|
# >>> np.tile(np.arange(c), r) + np.repeat(np.arange(r)*width, c) + c1
|
|
#array([ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 55, 56, 57,
|
|
# 58, 59, 60, 61, 62, 63, 64, 95, 96, 97, 98, 99, 100,
|
|
# 101, 102, 103, 104, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144])
|
|
r = r2 - r1
|
|
c = c2 - c1
|
|
indexes = np.tile(np.arange(c), r) + np.repeat(np.arange(r) * self.map_width, c) + start
|
|
s[indexes] |= style_bits
|
|
|
|
def rects_to_ranges(self, rects):
|
|
ranges = []
|
|
bpr = self.map_width
|
|
for (r1, c1), (r2, c2) in rects:
|
|
start = r1 * bpr + c1
|
|
end = (r2 - 1) * bpr + c2
|
|
ranges.append((start, end))
|
|
return ranges
|
|
|
|
def clear_style_bits(self, **kwargs):
|
|
style_mask = self.get_style_mask(**kwargs)
|
|
self.style &= style_mask
|
|
|
|
def set_user_data(self, ranges, user_index, user_data):
|
|
for start, end in ranges:
|
|
# FIXME: this is slow
|
|
for i in range(start, end):
|
|
rawindex = self.get_raw_index(i)
|
|
self.rawdata.extra.user_data[user_index][rawindex] = user_data
|
|
|
|
def get_user_data(self, index, user_index):
|
|
rawindex = self.get_raw_index(index)
|
|
try:
|
|
return self.rawdata.extra.user_data[user_index][rawindex]
|
|
except KeyError:
|
|
return 0
|
|
|
|
def get_sorted_user_data(self, user_index):
|
|
d = self.rawdata.extra.user_data[user_index]
|
|
indexes = sorted(d.keys())
|
|
ranges = []
|
|
start, end, current = None, None, None
|
|
for i in indexes:
|
|
if start is None:
|
|
start = i
|
|
current = d[i]
|
|
else:
|
|
if d[i] != current or i != end:
|
|
ranges.append([[start, end], current])
|
|
start = i
|
|
current = d[i]
|
|
end = i + 1
|
|
if start is not None:
|
|
ranges.append([[start, end], current])
|
|
return ranges
|
|
|
|
def get_style_at_indexes(self, indexes):
|
|
return self.style[indexes]
|
|
|
|
def remove_comments_at_indexes(self, indexes):
|
|
for where_index in indexes:
|
|
self.remove_comment(where_index)
|
|
|
|
def set_comments_at_indexes(self, indexes, comments):
|
|
for where_index, comment in zip(indexes, comments):
|
|
rawindex = self.get_raw_index(where_index)
|
|
self.rawdata.extra.comments[rawindex] = comment
|
|
|
|
def get_comments_at_indexes(self, indexes):
|
|
"""Get a list of comments at specified indexes"""
|
|
s = self.style[indexes]
|
|
has_comments = np.where(s & comment_bit_mask > 0)[0]
|
|
comments = []
|
|
for where_index in has_comments:
|
|
raw = self.get_raw_index(indexes[where_index])
|
|
comment = self.get_comment(indexes[where_index])
|
|
comments.append(comment)
|
|
return has_comments, comments
|
|
|
|
def get_comments_in_range(self, start, end):
|
|
"""Get a list of comments at specified indexes"""
|
|
comments = {}
|
|
|
|
# Naive way, but maybe it's fast enough: loop over all comments
|
|
# gathering those within the bounds
|
|
for rawindex, comment in self.rawdata.extra.comments.iteritems():
|
|
try:
|
|
index = self.get_index_from_base_index(rawindex)
|
|
except IndexError:
|
|
continue
|
|
if index >= start and index < end:
|
|
comments[index] = comment
|
|
return comments
|
|
|
|
def get_nonblank_comments_at_indexes(self, indexes):
|
|
"""Get a list of comments at specified indexes, but if blank, search
|
|
backward in that comment block to find the first index which should
|
|
have the comment.
|
|
"""
|
|
s = self.style[indexes]
|
|
has_comments = np.where(s & comment_bit_mask > 0)[0]
|
|
comments = []
|
|
for where_index in has_comments:
|
|
raw = self.get_raw_index(indexes[where_index])
|
|
comment = self.get_comment(indexes[where_index])
|
|
if not comment:
|
|
# naive method: search backward until we find the comment at
|
|
# the start of the comment block and transfer that to the new
|
|
# block
|
|
start_index = indexes[where_index]
|
|
while start_index > 0:
|
|
last = comment
|
|
start_index -= 1
|
|
comment = self.get_comment(start_index)
|
|
if comment:
|
|
break
|
|
elif (self.style[start_index] & comment_bit_mask) == 0:
|
|
comment = last
|
|
break
|
|
if start_index == 0:
|
|
comment = self.get_comment(0)
|
|
comments.append(comment)
|
|
return has_comments, comments
|
|
|
|
def set_comment(self, ranges, text):
|
|
self.set_style_ranges(ranges, comment=True)
|
|
for start, end in ranges:
|
|
rawindex = self.get_raw_index(start)
|
|
self.rawdata.extra.comments[rawindex] = text
|
|
|
|
def get_comment(self, index):
|
|
rawindex = self.get_raw_index(index)
|
|
return self.rawdata.extra.comments.get(rawindex, "")
|
|
|
|
def remove_comment(self, index):
|
|
rawindex = self.get_raw_index(index)
|
|
try:
|
|
del self.rawdata.extra.comments[rawindex]
|
|
except KeyError:
|
|
pass
|
|
|
|
def get_first_comment(self, ranges):
|
|
start = reduce(min, [r[0] for r in ranges])
|
|
rawindex = self.get_raw_index(start)
|
|
return self.rawdata.extra.comments.get(rawindex, "")
|
|
|
|
def clear_comment(self, ranges):
|
|
self.clear_style_ranges(ranges, comment=True)
|
|
for start, end in ranges:
|
|
rawindex = self.get_raw_index(start)
|
|
if rawindex in self.rawdata.extra.comments:
|
|
del self.rawdata.extra.comments[rawindex]
|
|
|
|
def get_sorted_comments(self):
|
|
return sorted([[k, v] for k, v in self.rawdata.extra.comments.iteritems()])
|
|
|
|
def iter_comments_in_segment(self):
|
|
start = self.start_addr
|
|
start_index = self.get_raw_index(0)
|
|
end_index = self.get_raw_index(len(self.rawdata))
|
|
for k, v in self.rawdata.extra.comments.iteritems():
|
|
if k >= start_index and k < end_index:
|
|
yield self.rawdata.get_reverse_index(k), v
|
|
|
|
def label(self, index, lower_case=True):
|
|
if lower_case:
|
|
return "%04x" % (index + self.start_addr)
|
|
else:
|
|
return "%04X" % (index + self.start_addr)
|
|
|
|
@property
|
|
def search_copy(self):
|
|
if self._search_copy is None:
|
|
self._search_copy = self.data.tostring()
|
|
return self._search_copy
|
|
|
|
def compare_segment(self, other_segment):
|
|
self.clear_style_bits(diff=True)
|
|
diff = self.rawdata.data != other_segment.rawdata.data
|
|
print diff, diff.dtype
|
|
d = diff * np.uint8(diff_bit_mask)
|
|
print d
|
|
self.style |= (diff * np.uint8(diff_bit_mask))
|
|
print "# entries", len(diff), "# diffs:", len(np.where(diff == True)[0])
|
|
|
|
|
|
class EmptySegment(DefaultSegment):
|
|
def __init__(self, rawdata, name="", error=None):
|
|
DefaultSegment.__init__(self, rawdata, 0, name, error)
|
|
|
|
def __str__(self):
|
|
s = "%s (empty file)" % (self.name, )
|
|
if self.error:
|
|
s += " " + self.error
|
|
return s
|
|
|
|
@property
|
|
def verbose_info(self):
|
|
s = "%s (empty file)" % (self.name, )
|
|
if self.error:
|
|
s += " error='%s'" % self.error
|
|
return s
|
|
|
|
def __len__(self):
|
|
return 0
|
|
|
|
|
|
class ObjSegment(DefaultSegment):
|
|
def __init__(self, rawdata, metadata_start, data_start, start_addr, end_addr=0, name="", **kwargs):
|
|
DefaultSegment.__init__(self, rawdata, start_addr, name, **kwargs)
|
|
self.metadata_start = metadata_start
|
|
self.data_start = data_start
|
|
|
|
def __str__(self):
|
|
count = len(self)
|
|
s = "%s $%04x-$%04x ($%04x @ $%04x)" % (self.name, self.start_addr, self.start_addr + count, count, self.data_start)
|
|
if self.error:
|
|
s += " " + self.error
|
|
return s
|
|
|
|
@property
|
|
def verbose_info(self):
|
|
count = len(self)
|
|
name = self.verbose_name or self.name
|
|
s = "%s address range: $%04x-$%04x ($%04x bytes), file index of first byte: $%04x" % (name, self.start_addr, self.start_addr + count, count, self.data_start)
|
|
if self.error:
|
|
s += " error='%s'" % self.error
|
|
return s
|
|
|
|
|
|
class RawSectorsSegment(DefaultSegment):
|
|
def __init__(self, rawdata, first_sector, num_sectors, count, boot_sector_size, num_boot_sectors, sector_size, **kwargs):
|
|
DefaultSegment.__init__(self, rawdata, 0, **kwargs)
|
|
self.boot_sector_size = boot_sector_size
|
|
self.num_boot_sectors = num_boot_sectors
|
|
self.page_size = sector_size
|
|
self.first_sector = first_sector
|
|
self.num_sectors = num_sectors
|
|
|
|
def __str__(self):
|
|
if self.num_sectors > 1:
|
|
s = "%s (sectors %d-%d)" % (self.name, self.first_sector, self.first_sector + self.num_sectors - 1)
|
|
else:
|
|
s = "%s (sector %d)" % (self.name, self.first_sector)
|
|
if self.error:
|
|
s += " " + self.error
|
|
return s
|
|
|
|
@property
|
|
def verbose_info(self):
|
|
name = self.verbose_name or self.name
|
|
if self.num_sectors > 1:
|
|
s = "%s (sectors %d-%d)" % (name, self.first_sector, self.first_sector + self.num_sectors - 1)
|
|
else:
|
|
s = "%s (sector %d)" % (name, self.first_sector)
|
|
s += " $%x bytes" % (len(self), )
|
|
if self.error:
|
|
s += " error='%s'" % self.error
|
|
return s
|
|
|
|
def label(self, index, lower_case=True):
|
|
boot_size = self.num_boot_sectors * self.boot_sector_size
|
|
if index >= boot_size:
|
|
sector, byte = divmod(index - boot_size, self.page_size)
|
|
sector += self.num_boot_sectors
|
|
else:
|
|
sector, byte = divmod(index, self.boot_sector_size)
|
|
if lower_case:
|
|
return "s%03d:%02x" % (sector + self.first_sector, byte)
|
|
return "s%03d:%02X" % (sector + self.first_sector, byte)
|
|
|
|
|
|
class RawTrackSectorSegment(RawSectorsSegment):
|
|
def label(self, index, lower_case=True):
|
|
boot_size = self.num_boot_sectors * self.boot_sector_size
|
|
if index >= boot_size:
|
|
sector, byte = divmod(index - boot_size, self.page_size)
|
|
sector += self.num_boot_sectors
|
|
else:
|
|
sector, byte = divmod(index, self.boot_sector_size)
|
|
sector += self.first_sector
|
|
t, s = divmod(sector, 16)
|
|
if lower_case:
|
|
return "t%02ds%02d:%02x" % (t, s, byte)
|
|
return "t%02ds%02d:%02X" % (t, s, byte)
|
|
|
|
def interleave_indexes(segments, num_bytes):
|
|
num_segments = len(segments)
|
|
size = len(segments[0])
|
|
for s in segments[1:]:
|
|
if size != len(s):
|
|
raise ValueError("All segments to interleave must be the same size")
|
|
_, rem = divmod(size, num_bytes)
|
|
if rem != 0:
|
|
raise ValueError("Segment size must be a multiple of the byte interleave")
|
|
interleave = np.empty(size * num_segments, dtype=np.uint32)
|
|
factor = num_bytes * num_segments
|
|
start = 0
|
|
for s in segments:
|
|
order = s.rawdata.get_indexes_from_base()
|
|
for i in range(num_bytes):
|
|
interleave[start::factor] = order[i::num_bytes]
|
|
start += 1
|
|
return interleave
|
|
|
|
def interleave_segments(segments, num_bytes):
|
|
new_index = interleave_indexes(segments, num_bytes)
|
|
data_base, style_base = segments[0].rawdata.get_bases()
|
|
for s in segments[1:]:
|
|
d, s = s.rawdata.get_bases()
|
|
if id(d) != id(data_base) or id(s) != id(style_base):
|
|
raise ValueError("Can't interleave segments with different base arrays")
|
|
raw = SegmentData(data_base, style_base, segments[0].rawdata.extra, order=new_index)
|
|
segment = DefaultSegment(raw, 0)
|
|
return segment
|
|
|
|
|
|
class SegmentList(list):
|
|
def add_segment(self, data, start_addr=0, name=None):
|
|
last = start_addr + len(data)
|
|
if name is None:
|
|
name = "%04x - %04x, size=%04x" % (start_addr, last, len(data))
|
|
rawdata = SegmentData(data)
|
|
s = DefaultSegment(rawdata, start_addr, name)
|
|
self.append(s)
|
|
return s
|