# vim: set tabstop=4 shiftwidth=4 noexpandtab filetype=python: # Copyright (C) 2017 T. Joseph Carter # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """ Utility functions for blocksfree. This module serves as a catch-all generally for functions that don't seem to belong somewhere else. """ from typing import Callable, Iterator, Sequence def seqsplit(seq: Sequence, num: int) -> Iterator[Sequence]: """Return a sequence in num-sized pieces. Args: seq: A sequence type (bytes, list, etc.) num: The maximum length of sequence desired Yields: The next num items from seq or as many as remain. The last sequence we yield may be shorter than mum elements. """ for i in range(0, len(seq), num): yield seq[i:i + num] def hexchars(line: bytes) -> str: """Return a canonical byte hexdump string of byte values. NB: This function will be memory intensive if called on large objects. It is actually intended to be called on at most 16 bytes at a time to produce a data for a single line of a canonical hexdump. Args: line: a bytes-like object to be dumped Returns: A string containing a canonical-style hex dump of byte values with space delimiters in groups of eight. The format has this pattern: ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##... The string will not be padded to a fixed length. """ vals = [format(b, '02x') for b in line] return ' '.join(' '.join(part) for part in seqsplit(vals, 8)) def printables(line: bytes, mask_high: bool = False) -> str: r"""Return ASCII printable string from bytes for hexdump. Args: line: The bytes to convert to ASCII mask_high: True to mask away high bit before testing printability Returns: String of printable ASCII characters equal to the number of bytes in line. All non-printable characters will be replaced by a dot (.) in the style of a canonical hexdump. If mask_high is True, the high bit of each character will be ignored. In that case b'\x41' and b'\xc1' will both produce 'A'. """ ascii_ = [] for char in line: if mask_high: char = char & 0x7f ascii_.append(chr(char) if 0x20 <= char < 0x7f else '.') return ''.join(ascii_) def hexdump_gen( buf: bytes, verbose: bool = False, mask_high: bool = False ) -> Iterator[str]: """Yield lines of a hexdump of a bytes-like object. Args: buf: A bytes-like object to be hexdumped verbose: Include full output rather than collapsing duplicated lines mask_high: Strip high bit of each byte for testing printable characters (see printables() above) Yields: For a zero-length buf, nothing. With verbose, lines of at most 16 bytes in the format: ' ||' The last line provides the total number of bytes in the buffer: '' With not verbose, repeated lines whose data is the same as the line just printed are compressed. Unique lines are as before: ' ||' All lines whose bytes are identical to the previously printed line will be compressed to a line containing an asterisk: '*' The last line provides the total number of bytes in the buffer: '' The format is 8 hex digits with no prefix. Individual bytes in are 2 hex digits with no prefix, see hexchars() for the precise format. The format is described for the function printables() and is bracketed by two pipe (|) characters as shown here. """ buf = memoryview(buf) last = None outstar = True i = 0 for i, line in enumerate(seqsplit(buf, 16)): if not verbose and line == last: if outstar: outstar = False # Ensure we yield only one star yield '*' else: last = line outstar = True # This line is not a star yield "{:07x}0 {:48} |{:16}|".format( i, hexchars(line), printables(line, mask_high)) if last is not None: yield format(i * 16 + len(last), '08x') def hexdump( buf: bytes, verbose: bool = False, mask_high: bool = False, func: Callable[[str], None] = print ) -> None: """Perform func for each line of a hexdump of buf. Exists as a means to temporarily dump binary data to stdout to assist with debugging stubborn code. For other uses, it probably makes more sense to call hexdump_gen() directly. Do use this function for other purposes, set func to something other than its default of the print function. func() will be passed each line of output as generated by hexdump_gen(). This is memory-efficient for sizable buffers, but it is not fast as func is effectively called for each sixteen bytes of buf. See hexdump_gen for a more information about the other arguments to this function and the format of the strings func will receive. Args: buf: the bytes-like object to be hexdumped verbose: True if we should not compress duplicate output mask_high: True high bit should be stripped for ASCII printability func: The function to be called with each line from hexdump_gen """ for line in hexdump_gen(buf, verbose, mask_high): func(line) gen_hexdump = hexdump_gen # TODO(tjcarter): Fix blocksfree.buffer and remove