mirror of
https://github.com/JeffJetton/apple1-worple.git
synced 2024-11-26 19:49:19 +00:00
103 lines
3.0 KiB
Python
103 lines
3.0 KiB
Python
|
|
# Read in file of five-letter words and encode them into two groups
|
|
# of three-byte packed binary.
|
|
#
|
|
# Each letter is converted to a five-bit value, 0-25.
|
|
# The bits are put together into a 25 bits, which would normally take
|
|
# up four bytes of storage. Since bit 0 of the MSB (the right-most bit
|
|
# of our 25 bits) can only be 0 or 1, we'll just split the data
|
|
# into two groups and only store the least-significant 24 bits.
|
|
#
|
|
# In other words, if the first letter is A-P (0-15), we can store that
|
|
# first letter in only four bits. Remaining letters are five bits each
|
|
# regardless, for a total of 24 bits (three bytes).
|
|
# If the first letter is P-Z (15-25), we'll still store it in four bits,
|
|
# but in separate list that we'll keep in a specific section of
|
|
# memory. We'll just need to be aware that words from that part of the
|
|
# list will need to have that bit put back on (i.e., have that first
|
|
# letter shifted up the alphabet by 16 letters)
|
|
|
|
import random
|
|
|
|
INFILE = 'words.txt'
|
|
OUTFILE_0 = 'words_0.bin'
|
|
OUTFILE_1 = 'words_1.bin'
|
|
|
|
|
|
def pack(word):
|
|
# Pack a five-letter word into three bytes.
|
|
# Return binary bytes along with an overflow flag (1 or 0)
|
|
word = word.strip().upper()
|
|
if len(word) != 5:
|
|
raise ValueError(f'"{word}" is not five letters long')
|
|
b = 0
|
|
for i in range(5):
|
|
# Convert letter to 0-25 integer
|
|
c = ord(word[i]) - 65
|
|
if (c < 0) or (c > 25):
|
|
raise ValueError('Character out of range A-Z in word ' + word)
|
|
# Scoot current contents of b over by five bits,
|
|
# then add new character
|
|
b = b * 32
|
|
b += c
|
|
|
|
# Did we "overflow" into bit 24?
|
|
if b & 0x1000000:
|
|
# Mask off just the leftmost three bytes
|
|
b &= 0xFFFFFF
|
|
overflow = 1
|
|
else:
|
|
overflow = 0
|
|
|
|
# Return as a three-byte bytes object
|
|
return [b.to_bytes(3, 'big'), overflow]
|
|
|
|
|
|
def head_and_tail(list, size=6):
|
|
print('Head: ' + str(list[0:size]))
|
|
print('Tail: ' + str(list[-size:]))
|
|
return
|
|
|
|
|
|
##############################################################################
|
|
|
|
# Read in list
|
|
print('\nReading ' + INFILE)
|
|
with open(INFILE, 'r') as infile:
|
|
words = infile.readlines()
|
|
print('Words read: ' + str(len(words)))
|
|
head_and_tail(words)
|
|
print()
|
|
|
|
# Shuffle them up
|
|
print('Shuffling...')
|
|
random.seed(24601)
|
|
random.shuffle(words)
|
|
head_and_tail(words)
|
|
print()
|
|
|
|
# Pack into bytes (this also uppercases and strips whitespace/returns)
|
|
print('Packing words...')
|
|
packed_list = [[],[]]
|
|
for word in words:
|
|
packed = pack(word)
|
|
packed_list[packed[1]].append(packed[0])
|
|
print('Words with first bit = 0: ' + str(len(packed_list[0])))
|
|
print('Words with first bit = 1: ' + str(len(packed_list[1])))
|
|
print()
|
|
|
|
# Save as two separate list files
|
|
print('Saving word lists...')
|
|
with open(OUTFILE_0, 'wb') as outfile:
|
|
for data in packed_list[0]:
|
|
outfile.write(data)
|
|
with open(OUTFILE_1, 'wb') as outfile:
|
|
for data in packed_list[1]:
|
|
outfile.write(data)
|
|
print('Done\n')
|
|
|
|
|
|
|
|
|
|
|