mirror of
https://github.com/robmcmullen/atrcopy.git
synced 2024-12-28 15:29:33 +00:00
Added splitting-on-comments when using get_entire_style_ranges, refs: https://github.com/robmcmullen/omnivore/issues/117
This commit is contained in:
parent
5f9acaa802
commit
476f0cd568
@ -464,7 +464,7 @@ class DefaultSegment(object):
|
||||
matches = (self.style & style_bits) == style_bits
|
||||
return self.bool_to_ranges(matches)
|
||||
|
||||
def get_entire_style_ranges(self, **kwargs):
|
||||
def get_entire_style_ranges(self, split_comments=None, **kwargs):
|
||||
"""Find sections of the segment that have the same style value.
|
||||
|
||||
The arguments to this function are used as a mask for the style to
|
||||
@ -477,6 +477,10 @@ class DefaultSegment(object):
|
||||
"""
|
||||
style_bits = self.get_style_bits(**kwargs)
|
||||
matches = self.style & style_bits
|
||||
if split_comments is None:
|
||||
split_comments = set()
|
||||
else:
|
||||
split_comments = set(split_comments)
|
||||
groups = np.split(matches, np.where(np.diff(matches) != 0)[0] + 1)
|
||||
# split into groups with the same numbers
|
||||
ranges = []
|
||||
@ -486,7 +490,18 @@ class DefaultSegment(object):
|
||||
return
|
||||
for group in groups:
|
||||
next_end = last_end + len(group)
|
||||
ranges.append(((last_end, next_end), matches[last_end]))
|
||||
style = matches[last_end]
|
||||
if style in split_comments:
|
||||
comment_list = self.get_comments_in_range(last_end, next_end)
|
||||
for index in sorted(comment_list.keys()):
|
||||
if last_end == index:
|
||||
# skip if the comment is at the start point because it
|
||||
# will always be split at the start point
|
||||
continue
|
||||
ranges.append(((last_end, index), style))
|
||||
last_end = index
|
||||
if last_end < next_end:
|
||||
ranges.append(((last_end, next_end), style))
|
||||
last_end = next_end
|
||||
return ranges
|
||||
|
||||
@ -655,6 +670,21 @@ class DefaultSegment(object):
|
||||
comments.append(comment)
|
||||
return has_comments, comments
|
||||
|
||||
def get_comments_in_range(self, start, end):
|
||||
"""Get a list of comments at specified indexes"""
|
||||
comments = {}
|
||||
|
||||
# Naive way, but maybe it's fast enough: loop over all comments
|
||||
# gathering those within the bounds
|
||||
for rawindex, comment in self.rawdata.extra.comments.iteritems():
|
||||
try:
|
||||
index = self.get_index_from_base_index(rawindex)
|
||||
except IndexError:
|
||||
continue
|
||||
if index >= start and index < end:
|
||||
comments[index] = comment
|
||||
return comments
|
||||
|
||||
def get_nonblank_comments_at_indexes(self, indexes):
|
||||
"""Get a list of comments at specified indexes, but if blank, search
|
||||
backward in that comment block to find the first index which should
|
||||
|
Loading…
Reference in New Issue
Block a user