From e5e908363ad8d8ef08a5546a81d788163d4d4ba1 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Wed, 23 Dec 2020 22:59:03 -0400 Subject: [PATCH] Add quick build time/size results for a sample templated operation. --- Roadmap-for-Flexibly-Timed-Platforms.md | 73 ++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/Roadmap-for-Flexibly-Timed-Platforms.md b/Roadmap-for-Flexibly-Timed-Platforms.md index f4f4b2f..427a87c 100644 --- a/Roadmap-for-Flexibly-Timed-Platforms.md +++ b/Roadmap-for-Flexibly-Timed-Platforms.md @@ -56,4 +56,75 @@ With a workable pattern suitably discovered, apply the same to other architectur Both the Z80 and 6502 both almost have instruction decoders themselves, in the form of the disassemblers. It would probably make sense to formalise those; it also wouldn't necessarily be unproductive: * in 6502 world, it would simplify the logic flow for instruction set selection between the 6502, various 65C02s and possibly the 65816; and -* for the purposes of the Z80, it might aid in unification of CLK and CP/M for OS X. \ No newline at end of file +* for the purposes of the Z80, it might aid in unification of CLK and CP/M for OS X. + +# Practicality of Templating + +I tested the following code, an obvious implementation, with GCC: + +``` +#include + +template struct MultiplyByX { + static int apply(int y) { + return x*y; + } +}; + +template typename Operation, int size> class OperationBank { + public: + OperationBank() { + fill<0, size>(); + } + + int apply(int x, int y) { + return funcs[x](y); + } + + private: + template void fill() { + if constexpr (!depth) { + return; + } + + if constexpr (depth & 1) { + funcs[offset + depth - 1] = &Operation::apply; + } + + fill(); + fill(); + } + + typedef int (*FuncPtr)(int); + FuncPtr funcs[size]; +}; + + +int main() { + OperationBank multipliers; + + // This for loop is contrived, but empirically sufficient to prompt + // GCC not to optimise out the full construction of multipliers above. + for(int c = 0; c < 3; c++) { + printf("%d\n", multipliers.apply(673+c, 3)); + } + + return 0; +} +``` + +I then measured build times and binary sizes with GCC `--std=c++17 -O3` on a 2.6Ghz i7 for different 'size's of the instance of `OperationBank`: + +|`size`|Compile Time (s)|Binary Size (bytes)| +|---|---|---| +|1024|1.317|160,240| +|16384|21.001|2,517,720| +|65536|104.598s|10,110,344| + +The close-enough-to linear change in binary size was taken as confirmation that GCC really was building the full function pointer sets; the worse-than-linear jump in compile time size was taken as a warning. + +Confirmed, then: +* it really makes sense to template on a minimal set of fundamentals — likely addressing mode only (albeit in the slightly broad sense, e.g. to include direction for the 8086); +* anything that can branchlessly be read from the instruction stream without undue contortion such as the specific registers or constants involved should be read from the instruction stream, not precompiled into overly-specific functions. + +These were sensible assumptions, but are even more sensible as empiricals. \ No newline at end of file