mirror of
https://github.com/TomHarte/CLK.git
synced 2024-11-21 21:33:54 +00:00
Add quick build time/size results for a sample templated operation.
parent
f6993e01b1
commit
e5e908363a
@ -56,4 +56,75 @@ With a workable pattern suitably discovered, apply the same to other architectur
|
||||
Both the Z80 and 6502 both almost have instruction decoders themselves, in the form of the disassemblers. It would probably make sense to formalise those; it also wouldn't necessarily be unproductive:
|
||||
|
||||
* in 6502 world, it would simplify the logic flow for instruction set selection between the 6502, various 65C02s and possibly the 65816; and
|
||||
* for the purposes of the Z80, it might aid in unification of CLK and CP/M for OS X.
|
||||
* for the purposes of the Z80, it might aid in unification of CLK and CP/M for OS X.
|
||||
|
||||
# Practicality of Templating
|
||||
|
||||
I tested the following code, an obvious implementation, with GCC:
|
||||
|
||||
```
|
||||
#include <cstdio>
|
||||
|
||||
template<int x> struct MultiplyByX {
|
||||
static int apply(int y) {
|
||||
return x*y;
|
||||
}
|
||||
};
|
||||
|
||||
template<template<int x> typename Operation, int size> class OperationBank {
|
||||
public:
|
||||
OperationBank() {
|
||||
fill<0, size>();
|
||||
}
|
||||
|
||||
int apply(int x, int y) {
|
||||
return funcs[x](y);
|
||||
}
|
||||
|
||||
private:
|
||||
template<int offset, int depth> void fill() {
|
||||
if constexpr (!depth) {
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (depth & 1) {
|
||||
funcs[offset + depth - 1] = &Operation<offset + depth - 1>::apply;
|
||||
}
|
||||
|
||||
fill<offset, depth / 2>();
|
||||
fill<offset + (depth / 2), depth / 2>();
|
||||
}
|
||||
|
||||
typedef int (*FuncPtr)(int);
|
||||
FuncPtr funcs[size];
|
||||
};
|
||||
|
||||
|
||||
int main() {
|
||||
OperationBank<MultiplyByX, 65536> multipliers;
|
||||
|
||||
// This for loop is contrived, but empirically sufficient to prompt
|
||||
// GCC not to optimise out the full construction of multipliers above.
|
||||
for(int c = 0; c < 3; c++) {
|
||||
printf("%d\n", multipliers.apply(673+c, 3));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
I then measured build times and binary sizes with GCC `--std=c++17 -O3` on a 2.6Ghz i7 for different 'size's of the instance of `OperationBank`:
|
||||
|
||||
|`size`|Compile Time (s)|Binary Size (bytes)|
|
||||
|---|---|---|
|
||||
|1024|1.317|160,240|
|
||||
|16384|21.001|2,517,720|
|
||||
|65536|104.598s|10,110,344|
|
||||
|
||||
The close-enough-to linear change in binary size was taken as confirmation that GCC really was building the full function pointer sets; the worse-than-linear jump in compile time size was taken as a warning.
|
||||
|
||||
Confirmed, then:
|
||||
* it really makes sense to template on a minimal set of fundamentals — likely addressing mode only (albeit in the slightly broad sense, e.g. to include direction for the 8086);
|
||||
* anything that can branchlessly be read from the instruction stream without undue contortion such as the specific registers or constants involved should be read from the instruction stream, not precompiled into overly-specific functions.
|
||||
|
||||
These were sensible assumptions, but are even more sensible as empiricals.
|
Loading…
Reference in New Issue
Block a user