mirror of
https://github.com/iKarith/cppo-ng.git
synced 2024-06-01 21:41:34 +00:00
db6c481ad6
First, docstrings have been haphazard and inconsistent in blocksfree, that's going to change. I don't know rST markup, but I do intend to learn it because it is a superior format for technical writing. User-focused docs will remain in Markdown format. It's more likely to be read by end-users as text, after all, and those sorts of docs are the things Markdown is good for. Rewrote printables() in procedural fashion for clarity. Would like to have done that with hexchars(), but that's not actually much clearer when written procedurally than functionally, so I let be. Functions now have type hints again. Those went away when I rewrote this mess and I didn't put them back. Finally I renamed the Iterator version of this function to hexdump_gen. pylint3 objects to the workaround to mixing commits. Temporary.
168 lines
5.6 KiB
Python
168 lines
5.6 KiB
Python
# vim: set tabstop=4 shiftwidth=4 noexpandtab filetype=python:
|
|
|
|
# Copyright (C) 2017 T. Joseph Carter
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by the
|
|
# Free Software Foundation; either version 2 of the License, or (at your
|
|
# option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
# for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
"""
|
|
Utility functions for blocksfree.
|
|
|
|
This module serves as a catch-all generally for functions that don't seem to
|
|
belong somewhere else.
|
|
"""
|
|
|
|
from typing import Callable, Iterator, Sequence
|
|
|
|
def seqsplit(seq: Sequence, num: int) -> Iterator[Sequence]:
|
|
"""Return a sequence in num-sized pieces.
|
|
|
|
Args:
|
|
seq: A sequence type (bytes, list, etc.)
|
|
num: The maximum length of sequence desired
|
|
|
|
Yields:
|
|
The next num items from seq or as many as remain. The last sequence we
|
|
yield may be shorter than mum elements.
|
|
"""
|
|
for i in range(0, len(seq), num):
|
|
yield seq[i:i + num]
|
|
|
|
def hexchars(line: bytes) -> str:
|
|
"""Return a canonical byte hexdump string of byte values.
|
|
|
|
NB: This function will be memory intensive if called on large objects. It
|
|
is actually intended to be called on at most 16 bytes at a time to produce
|
|
a data for a single line of a canonical hexdump.
|
|
|
|
Args:
|
|
line: a bytes-like object to be dumped
|
|
|
|
Returns:
|
|
A string containing a canonical-style hex dump of byte values with
|
|
space delimiters in groups of eight. The format has this pattern:
|
|
|
|
## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##...
|
|
|
|
The string will not be padded to a fixed length.
|
|
"""
|
|
vals = [format(b, '02x') for b in line]
|
|
return ' '.join(' '.join(part) for part in seqsplit(vals, 8))
|
|
|
|
def printables(line: bytes, mask_high: bool = False) -> str:
|
|
r"""Return ASCII printable string from bytes for hexdump.
|
|
|
|
Args:
|
|
line: The bytes to convert to ASCII
|
|
mask_high: True to mask away high bit before testing printability
|
|
|
|
Returns:
|
|
String of printable ASCII characters equal to the number of bytes in
|
|
line. All non-printable characters will be replaced by a dot (.) in
|
|
the style of a canonical hexdump.
|
|
|
|
If mask_high is True, the high bit of each character will be ignored.
|
|
In that case b'\x41' and b'\xc1' will both produce 'A'.
|
|
"""
|
|
ascii_ = []
|
|
for char in line:
|
|
if mask_high:
|
|
char = char & 0x7f
|
|
ascii_.append(chr(char) if 0x20 <= char < 0x7f else '.')
|
|
return ''.join(ascii_)
|
|
|
|
|
|
def hexdump_gen(
|
|
buf: bytes,
|
|
verbose: bool = False,
|
|
mask_high: bool = False
|
|
) -> Iterator[str]:
|
|
"""Yield lines of a hexdump of a bytes-like object.
|
|
|
|
Args:
|
|
buf: A bytes-like object to be hexdumped
|
|
verbose: Include full output rather than collapsing duplicated lines
|
|
mask_high: Strip high bit of each byte for testing printable characters
|
|
(see printables() above)
|
|
|
|
Yields:
|
|
For a zero-length buf, nothing.
|
|
|
|
With verbose, lines of at most 16 bytes in the format:
|
|
'<offset> <hex bytes> |<ASCII bytes>|'
|
|
The last line provides the total number of bytes in the buffer:
|
|
'<offset>'
|
|
|
|
With not verbose, repeated lines whose data is the same as the line
|
|
just printed are compressed. Unique lines are as before:
|
|
'<offset> <hex bytes> |<ASCII bytes>|'
|
|
All lines whose bytes are identical to the previously printed line will
|
|
be compressed to a line containing an asterisk:
|
|
'*'
|
|
The last line provides the total number of bytes in the buffer:
|
|
'<offset>'
|
|
|
|
The <offset> format is 8 hex digits with no prefix. Individual bytes
|
|
in <hex bytes> are 2 hex digits with no prefix, see hexchars() for the
|
|
precise format. The <ASCII bytes> format is described for the function
|
|
printables() and is bracketed by two pipe (|) characters as shown here.
|
|
"""
|
|
buf = memoryview(buf)
|
|
last = None
|
|
outstar = True
|
|
i = 0
|
|
|
|
for i, line in enumerate(seqsplit(buf, 16)):
|
|
if not verbose and line == last:
|
|
if outstar:
|
|
outstar = False # Ensure we yield only one star
|
|
yield '*'
|
|
else:
|
|
last = line
|
|
outstar = True # This line is not a star
|
|
yield "{:07x}0 {:48} |{:16}|".format(
|
|
i, hexchars(line), printables(line, mask_high))
|
|
|
|
if last is not None:
|
|
yield format(i * 16 + len(last), '08x')
|
|
|
|
def hexdump(
|
|
buf: bytes,
|
|
verbose: bool = False,
|
|
mask_high: bool = False,
|
|
func: Callable[[str], None] = print
|
|
) -> None:
|
|
"""Perform func for each line of a hexdump of buf.
|
|
|
|
Exists as a means to temporarily dump binary data to stdout to assist with
|
|
debugging stubborn code. For other uses, it probably makes more sense to
|
|
call hexdump_gen() directly. Do use this function for other purposes, set
|
|
func to something other than its default of the print function. func()
|
|
will be passed each line of output as generated by hexdump_gen(). This is
|
|
memory-efficient for sizable buffers, but it is not fast as func is
|
|
effectively called for each sixteen bytes of buf.
|
|
|
|
See hexdump_gen for a more information about the other arguments to this
|
|
function and the format of the strings func will receive.
|
|
|
|
Args:
|
|
buf: the bytes-like object to be hexdumped
|
|
verbose: True if we should not compress duplicate output
|
|
mask_high: True high bit should be stripped for ASCII printability
|
|
func: The function to be called with each line from hexdump_gen
|
|
"""
|
|
for line in hexdump_gen(buf, verbose, mask_high):
|
|
func(line)
|
|
|
|
gen_hexdump = hexdump_gen # TODO(tjcarter): Fix blocksfree.buffer and remove
|