#!/usr/bin/python3
#
# Replace blocks of text in an HTML document with the contents of a file.
#
# For example:
#
#
#
#
#
#
# Only the lines between START/END are replaced. The START/END lines are
# left in place so the process can be performed repeatedly without needing
# a separate ".in" file.
#
#
# Run this from the top-level directory. Provide a list of all HTML files
# on the command line, using relative paths.
#
# If the name of the file to include (specified on the START line) begins
# with '/', the file will be loaded from the top-level directory, i.e. the
# one where this command was run. If not, the file will be read from the
# directory where the HTML file lives.
#
# find . -name '*.html' -print0 | xargs -0 ./block-repl.py
#
# (Tested with Python v3.8.5)
#
# Copyright 2021 Andy McFadden.
#
import filecmp
import os.path
import re
import sys
class LocalError(Exception):
"""Errors generated internally"""
pass
# Regex pattern for block substitution. There are three groups:
# 0: (full match)
# 1: START filename
# 2: "active" id (optional)
# 3: middle data (to be replaced)
# 4: END filename
# The START/END names are expected to match. If not, we probably found the
# end of a different block, and should tell the user that something is off.
findChunk = re.compile(
"^\s*\s*$."
"(.*?)"
"^\s*",
re.DOTALL | re.MULTILINE)
GROUP_ALL = 0
GROUP_START = 1
GROUP_ACTIVE_ID = 2
GROUP_CHUNK = 3
GROUP_END = 4
def editFile(inFileName, outFileName):
""" Edit a file, replacing blocks with the contents of other files. """
try:
with open(inFileName, "r") as inFile:
fileData = inFile.read()
outFile = open(outFileName, "x")
except IOError as err:
raise LocalError(err)
# For each chunk found, replace the contents.
startPos = 0
while True:
match = findChunk.search(fileData, startPos)
if not match:
break
startTag = match.group(GROUP_START)
endTag = match.group(GROUP_END)
if startTag != endTag:
raise LocalError("START/END tag mismatch: " + startTag + " vs. " + endTag)
replSpan = match.span(GROUP_CHUNK)
chunk = fileData[replSpan[0] : replSpan[1]]
print("== Matched {0}:{1}".format(replSpan[0], replSpan[1]))
activeId = match.group(GROUP_ACTIVE_ID)
if activeId:
print("== active ID: " + activeId)
# copy everything up to the chunk
outFile.write(fileData[startPos : replSpan[0]])
# insert the file, tweaking active ID if appropriate
copyFromIncl(inFileName, startTag, activeId, outFile)
# copy the rest of the match
outFile.write(fileData[replSpan[1] : match.end(GROUP_ALL)])
# Start next search at end of full search.
startPos = match.end(GROUP_ALL)
print("== Start next at {0}".format(startPos))
print("== done")
outFile.write(fileData[startPos:])
outFile.close()
def copyFromIncl(inFileName, tag, activeId, outFile):
"""
Copy include file in, substituting active ID and path variables when
appropriate.
inFileName: relative path to file we're working on
tag: name of file to include (absolute or relative to inFileName)
activeID: ID to mark as active
outFile: file object to write data to
"""
inFileDir = os.path.dirname(inFileName)
if tag[0] == '/':
# file is in top-level (current) directory
inclFileName = tag[1:]
else:
# file is in same directory as input file
inclFileName = os.path.join(inFileDir, tag)
print("== replacing section with " + inclFileName)
try:
# Use utf-8-sig to skip over Byte Order Marks (BOM).
with open(inclFileName, "r", encoding="utf-8-sig") as inFile:
fileData = inFile.read()
except IOError as err:
raise LocalError(err)
# Create a relative path for ${ROOT}, which is defined as the directory
# in which we're running. The path gets us from the input file's
# directory back to the root.
# TODO: make this work correctly for absolute paths?
if inFileName[0] == '/':
raise LocalError("Not a relative path: " + inFileName)
tmpPair = os.path.split(inFileName)
rootRel = ""
while tmpPair[0]:
if tmpPair[0] != ".":
# ignore leading "./", which you get from find+xargs
rootRel += "../"
tmpPair = os.path.split(tmpPair[0])
# TODO: consider having a ${LOCAL} ...
# Create a relative path for ${LOCAL}, which is defined as the directory
# from which the input file was loaded. The path gets us from the input
# file's directory back to the included file's directory
#
# Suppose we're working on foo/bar/index.html, which includes
# ../incl-sidenav.html (i.e. foo/incl-sidenav.html). We want references
# to map ${LOCAL}/bar/glob.html to "glob.html", and
# ${LOCAL}/splat/index.html to "../splat/index.html". For now, we always
# use a local offset, so it's actually "../bar/glob.html". This is a step
# up from ${ROOT}, which would be "../../foo/bar/glob.html", but not as
# clever as we could be.
#
# What we really want to do is extract the string that follows ${ROOT}
# from the input file and compute the minimal path, but that requires
# more effort than a simple variable substitution. ${LOCAL} would be a
# half-step, and probably not worth the effort.
if activeId:
# Given an HTML block like , insert
# a class assignment: class="active". The ID to modify is
# specified by "activeId".
pattern = 'id="' + re.escape(activeId) + '"'
repl = 'id="' + activeId + '" class="active"'
newData = re.sub(pattern, repl, fileData)
if newData == fileData:
print("== active ID '" + activeId + "' not found")
else:
fileData = newData
# Replace ${ROOT} with relative path to root directory.
newData = re.sub("\${ROOT}\/", rootRel, fileData)
if newData != fileData:
#print("== ${ROOT}=" + rootRel + " in " + inclFileName)
fileData = newData
# Copy data to output file.
outFile.write(fileData)
def main():
""" main """
fileNames = sys.argv[1:]
outFileName = None
try:
for name in fileNames:
print("Processing: " + name)
outFileName = name + "_NEW"
editFile(name, outFileName)
# See if the file has changed. If it hasn't, keep the original
# so the file dates don't change.
if filecmp.cmp(name, outFileName, False):
print("== No changes, removing new")
os.remove(outFileName)
else:
print("== Changed, keeping new")
os.rename(outFileName, name)
except LocalError as err:
print("ERROR: {0}".format(err))
if outFileName:
print(" check " + outFileName)
sys.exit(1)
sys.exit(0)
main() # does not return