mirror of
https://github.com/robmcmullen/atrcopy.git
synced 2024-12-01 09:49:42 +00:00
Added faster comment splitting
* added functions to copy a segment & its base so comments can be marked on the copy of the base and show up in the copy of the segment
This commit is contained in:
parent
afa9c9786a
commit
f1b0f5ebac
@ -157,6 +157,14 @@ class SegmentData(object):
|
|||||||
buf = cStringIO.StringIO(self.data[:])
|
buf = cStringIO.StringIO(self.data[:])
|
||||||
return buf
|
return buf
|
||||||
|
|
||||||
|
@property
|
||||||
|
def data_base(self):
|
||||||
|
return self.data.base if self.data.base is not None else self.data
|
||||||
|
|
||||||
|
@property
|
||||||
|
def style_base(self):
|
||||||
|
return self.style.base if self.style.base is not None else self.style
|
||||||
|
|
||||||
def get_data(self):
|
def get_data(self):
|
||||||
return self.data
|
return self.data
|
||||||
|
|
||||||
@ -227,10 +235,17 @@ class SegmentData(object):
|
|||||||
d = self.data.np_data.copy()
|
d = self.data.np_data.copy()
|
||||||
s = self.style.np_data.copy()
|
s = self.style.np_data.copy()
|
||||||
copy = SegmentData(d, s, order=self.order)
|
copy = SegmentData(d, s, order=self.order)
|
||||||
else:
|
elif self.data.base is None:
|
||||||
|
# if there is no base array, we aren't looking at a slice so we
|
||||||
|
# must be copying the entire array.
|
||||||
d = self.data.copy()
|
d = self.data.copy()
|
||||||
s = self.style.copy()
|
s = self.style.copy()
|
||||||
|
copy = SegmentData(d, s)
|
||||||
|
else:
|
||||||
|
d = self.data.base.copy()
|
||||||
|
s = self.style.base.copy()
|
||||||
start, end = self.byte_bounds_offset()
|
start, end = self.byte_bounds_offset()
|
||||||
|
print "copy: start, end =", start, end
|
||||||
copy = SegmentData(d[start:end], s[start:end])
|
copy = SegmentData(d[start:end], s[start:end])
|
||||||
return copy
|
return copy
|
||||||
|
|
||||||
@ -480,6 +495,17 @@ class DefaultSegment(object):
|
|||||||
matches = (self.style & style_bits) == style_bits
|
matches = (self.style & style_bits) == style_bits
|
||||||
return self.bool_to_ranges(matches)
|
return self.bool_to_ranges(matches)
|
||||||
|
|
||||||
|
def get_comment_locations(self, **kwargs):
|
||||||
|
style_bits = self.get_style_bits(**kwargs)
|
||||||
|
r = self.rawdata.copy()
|
||||||
|
print len(r.style)
|
||||||
|
print len(r.style_base)
|
||||||
|
base = r.style_base & style_bits
|
||||||
|
comment_indexes = np.asarray(self.rawdata.extra.comments.keys(), dtype=np.uint32)
|
||||||
|
print comment_indexes
|
||||||
|
base[comment_indexes] |= comment_bit_mask
|
||||||
|
return r.style
|
||||||
|
|
||||||
def get_entire_style_ranges(self, split_comments=None, **kwargs):
|
def get_entire_style_ranges(self, split_comments=None, **kwargs):
|
||||||
"""Find sections of the segment that have the same style value.
|
"""Find sections of the segment that have the same style value.
|
||||||
|
|
||||||
@ -492,32 +518,41 @@ class DefaultSegment(object):
|
|||||||
tuple; and an integer with the style value.
|
tuple; and an integer with the style value.
|
||||||
"""
|
"""
|
||||||
style_bits = self.get_style_bits(**kwargs)
|
style_bits = self.get_style_bits(**kwargs)
|
||||||
matches = self.style & style_bits
|
matches = self.get_comment_locations(**kwargs)
|
||||||
if split_comments is None:
|
|
||||||
split_comments = set()
|
|
||||||
else:
|
|
||||||
split_comments = set(split_comments)
|
|
||||||
groups = np.split(matches, np.where(np.diff(matches) != 0)[0] + 1)
|
groups = np.split(matches, np.where(np.diff(matches) != 0)[0] + 1)
|
||||||
|
# print groups
|
||||||
# split into groups with the same numbers
|
# split into groups with the same numbers
|
||||||
ranges = []
|
ranges = []
|
||||||
last_end = 0
|
last_end = 0
|
||||||
if len(groups) == 1 and len(groups[0]) == 0:
|
if len(groups) == 1 and len(groups[0]) == 0:
|
||||||
# check for degenerate case
|
# check for degenerate case
|
||||||
return
|
return
|
||||||
|
last_style = -1
|
||||||
for group in groups:
|
for group in groups:
|
||||||
next_end = last_end + len(group)
|
# each group is guaranteed to have the same style
|
||||||
|
size = len(group)
|
||||||
|
next_end = last_end + size
|
||||||
style = matches[last_end]
|
style = matches[last_end]
|
||||||
if style in split_comments:
|
masked_style = style & style_bits
|
||||||
comment_list = self.get_comments_in_range(last_end, next_end)
|
# print last_end, next_end, style, masked_style, size, group
|
||||||
for index in sorted(comment_list.keys()):
|
if style & comment_bit_mask:
|
||||||
if last_end == index:
|
if masked_style in split_comments:
|
||||||
# skip if the comment is at the start point because it
|
# print "interesting comment", last_end, next_end
|
||||||
# will always be split at the start point
|
ranges.append(((last_end, next_end), masked_style))
|
||||||
continue
|
else:
|
||||||
ranges.append(((last_end, index), style))
|
# print "non-interesting comment", last_end, next_end
|
||||||
last_end = index
|
if last_style == masked_style:
|
||||||
if last_end < next_end:
|
((prev_end, _), _) = ranges.pop()
|
||||||
ranges.append(((last_end, next_end), style))
|
ranges.append(((prev_end, next_end), masked_style))
|
||||||
|
else:
|
||||||
|
ranges.append(((last_end, next_end), masked_style))
|
||||||
|
else:
|
||||||
|
if last_style == masked_style:
|
||||||
|
((prev_end, _), _) = ranges.pop()
|
||||||
|
ranges.append(((prev_end, next_end), masked_style))
|
||||||
|
else:
|
||||||
|
ranges.append(((last_end, next_end), masked_style))
|
||||||
|
last_style = masked_style
|
||||||
last_end = next_end
|
last_end = next_end
|
||||||
return ranges
|
return ranges
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ import os
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from atrcopy import DefaultSegment, SegmentData, get_xex, interleave_segments
|
from atrcopy import DefaultSegment, SegmentData, get_xex, interleave_segments, user_bit_mask
|
||||||
|
|
||||||
|
|
||||||
def get_indexed(segment, num, scale):
|
def get_indexed(segment, num, scale):
|
||||||
@ -181,6 +181,87 @@ class TestIndexed(object):
|
|||||||
assert not np.all((c.data[:] - s.data[:]) == 0)
|
assert not np.all((c.data[:] - s.data[:]) == 0)
|
||||||
|
|
||||||
|
|
||||||
|
class TestComments(object):
|
||||||
|
def setup(self):
|
||||||
|
data = np.ones([4000], dtype=np.uint8)
|
||||||
|
r = SegmentData(data)
|
||||||
|
self.segment = DefaultSegment(r, 0)
|
||||||
|
self.sub_segment = DefaultSegment(r[2:202], 2)
|
||||||
|
|
||||||
|
def test_locations(self):
|
||||||
|
s = self.segment
|
||||||
|
s.set_comment([[4,5]], "test1")
|
||||||
|
s.set_comment([[40,50]], "test2")
|
||||||
|
s.set_style_ranges([[2,100]], comment=True)
|
||||||
|
s.set_style_ranges([[200, 299]], data=True)
|
||||||
|
for i in range(1,4):
|
||||||
|
for j in range(1, 4):
|
||||||
|
# create some with overlapping regions, some without
|
||||||
|
r = [500*j, 500*j + 200*i + 200]
|
||||||
|
s.set_style_ranges([r], user=i)
|
||||||
|
s.set_user_data([r], i, i*10 + j)
|
||||||
|
r = [100, 200]
|
||||||
|
s.set_style_ranges([r], user=4)
|
||||||
|
s.set_user_data([r], 4, 99)
|
||||||
|
r = [3100, 3200]
|
||||||
|
s.set_style_ranges([r], user=4)
|
||||||
|
s.set_user_data([r], 4, 99)
|
||||||
|
|
||||||
|
s2 = self.sub_segment
|
||||||
|
print len(s2)
|
||||||
|
copy = s2.get_comment_locations()
|
||||||
|
print copy
|
||||||
|
# comments at 4 and 40 in the original means 2 and 38 in the copy
|
||||||
|
orig = s.get_comment_locations()
|
||||||
|
assert copy[2] == orig[4]
|
||||||
|
assert copy[28] == orig[38]
|
||||||
|
|
||||||
|
def test_split_data_at_comment(self):
|
||||||
|
s = self.segment
|
||||||
|
s.set_style_ranges([[0,1000]], data=True)
|
||||||
|
for i in range(0, len(s), 25):
|
||||||
|
s.set_comment([[i,i+1]], "comment at %d" % i)
|
||||||
|
|
||||||
|
s2 = self.sub_segment
|
||||||
|
print len(s2)
|
||||||
|
copy = s2.get_comment_locations()
|
||||||
|
print copy
|
||||||
|
# comments at 4 and 40 in the original means 2 and 38 in the copy
|
||||||
|
orig = s.get_comment_locations()
|
||||||
|
print orig[0:200]
|
||||||
|
assert copy[2] == orig[4]
|
||||||
|
assert copy[28] == orig[38]
|
||||||
|
|
||||||
|
r = s2.get_entire_style_ranges([1], user=True)
|
||||||
|
print r
|
||||||
|
assert r == [((0, 23), 1), ((23, 48), 1), ((48, 73), 1), ((73, 98), 1), ((98, 123), 1), ((123, 148), 1), ((148, 173), 1), ((173, 198), 1), ((198, 200), 1)]
|
||||||
|
|
||||||
|
def test_split_data_at_comment2(self):
|
||||||
|
s = self.segment
|
||||||
|
start = 0
|
||||||
|
i = 0
|
||||||
|
for end in range(40, 1000, 40):
|
||||||
|
s.set_style_ranges([[start, end]], user=i)
|
||||||
|
start = end
|
||||||
|
i = (i + 1) % 8
|
||||||
|
for i in range(0, len(s), 25):
|
||||||
|
s.set_comment([[i,i+1]], "comment at %d" % i)
|
||||||
|
|
||||||
|
s2 = self.sub_segment
|
||||||
|
print len(s2)
|
||||||
|
copy = s2.get_comment_locations()
|
||||||
|
print copy
|
||||||
|
# comments at 4 and 40 in the original means 2 and 38 in the copy
|
||||||
|
orig = s.get_comment_locations()
|
||||||
|
print orig[0:200]
|
||||||
|
assert copy[2] == orig[4]
|
||||||
|
assert copy[28] == orig[38]
|
||||||
|
|
||||||
|
r = s2.get_entire_style_ranges([1], user=user_bit_mask)
|
||||||
|
print r
|
||||||
|
assert r == [((0, 38), 0), ((38, 48), 1), ((48, 73), 1), ((73, 78), 1), ((78, 118), 2), ((118, 158), 3), ((158, 198), 4), ((198, 200), 5)]
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
t = TestIndexed()
|
t = TestIndexed()
|
||||||
t.setup()
|
t.setup()
|
||||||
@ -191,3 +272,6 @@ if __name__ == "__main__":
|
|||||||
t.setup()
|
t.setup()
|
||||||
t.test_xex()
|
t.test_xex()
|
||||||
t.test_copy()
|
t.test_copy()
|
||||||
|
t = TestComments()
|
||||||
|
t.setup()
|
||||||
|
t.test_split_data_at_comment()
|
||||||
|
Loading…
Reference in New Issue
Block a user