mirror of
https://github.com/robmcmullen/atrcopy.git
synced 2025-01-16 02:32:52 +00:00
Added splitting-on-comments when using get_entire_style_ranges, refs: https://github.com/robmcmullen/omnivore/issues/117
This commit is contained in:
parent
5f9acaa802
commit
476f0cd568
@ -464,7 +464,7 @@ class DefaultSegment(object):
|
|||||||
matches = (self.style & style_bits) == style_bits
|
matches = (self.style & style_bits) == style_bits
|
||||||
return self.bool_to_ranges(matches)
|
return self.bool_to_ranges(matches)
|
||||||
|
|
||||||
def get_entire_style_ranges(self, **kwargs):
|
def get_entire_style_ranges(self, split_comments=None, **kwargs):
|
||||||
"""Find sections of the segment that have the same style value.
|
"""Find sections of the segment that have the same style value.
|
||||||
|
|
||||||
The arguments to this function are used as a mask for the style to
|
The arguments to this function are used as a mask for the style to
|
||||||
@ -477,6 +477,10 @@ class DefaultSegment(object):
|
|||||||
"""
|
"""
|
||||||
style_bits = self.get_style_bits(**kwargs)
|
style_bits = self.get_style_bits(**kwargs)
|
||||||
matches = self.style & style_bits
|
matches = self.style & style_bits
|
||||||
|
if split_comments is None:
|
||||||
|
split_comments = set()
|
||||||
|
else:
|
||||||
|
split_comments = set(split_comments)
|
||||||
groups = np.split(matches, np.where(np.diff(matches) != 0)[0] + 1)
|
groups = np.split(matches, np.where(np.diff(matches) != 0)[0] + 1)
|
||||||
# split into groups with the same numbers
|
# split into groups with the same numbers
|
||||||
ranges = []
|
ranges = []
|
||||||
@ -486,7 +490,18 @@ class DefaultSegment(object):
|
|||||||
return
|
return
|
||||||
for group in groups:
|
for group in groups:
|
||||||
next_end = last_end + len(group)
|
next_end = last_end + len(group)
|
||||||
ranges.append(((last_end, next_end), matches[last_end]))
|
style = matches[last_end]
|
||||||
|
if style in split_comments:
|
||||||
|
comment_list = self.get_comments_in_range(last_end, next_end)
|
||||||
|
for index in sorted(comment_list.keys()):
|
||||||
|
if last_end == index:
|
||||||
|
# skip if the comment is at the start point because it
|
||||||
|
# will always be split at the start point
|
||||||
|
continue
|
||||||
|
ranges.append(((last_end, index), style))
|
||||||
|
last_end = index
|
||||||
|
if last_end < next_end:
|
||||||
|
ranges.append(((last_end, next_end), style))
|
||||||
last_end = next_end
|
last_end = next_end
|
||||||
return ranges
|
return ranges
|
||||||
|
|
||||||
@ -655,6 +670,21 @@ class DefaultSegment(object):
|
|||||||
comments.append(comment)
|
comments.append(comment)
|
||||||
return has_comments, comments
|
return has_comments, comments
|
||||||
|
|
||||||
|
def get_comments_in_range(self, start, end):
|
||||||
|
"""Get a list of comments at specified indexes"""
|
||||||
|
comments = {}
|
||||||
|
|
||||||
|
# Naive way, but maybe it's fast enough: loop over all comments
|
||||||
|
# gathering those within the bounds
|
||||||
|
for rawindex, comment in self.rawdata.extra.comments.iteritems():
|
||||||
|
try:
|
||||||
|
index = self.get_index_from_base_index(rawindex)
|
||||||
|
except IndexError:
|
||||||
|
continue
|
||||||
|
if index >= start and index < end:
|
||||||
|
comments[index] = comment
|
||||||
|
return comments
|
||||||
|
|
||||||
def get_nonblank_comments_at_indexes(self, indexes):
|
def get_nonblank_comments_at_indexes(self, indexes):
|
||||||
"""Get a list of comments at specified indexes, but if blank, search
|
"""Get a list of comments at specified indexes, but if blank, search
|
||||||
backward in that comment block to find the first index which should
|
backward in that comment block to find the first index which should
|
||||||
|
Loading…
x
Reference in New Issue
Block a user