diff --git a/presenter_demo/Makefile b/presenter_demo/Makefile index 553d5969..69b211a6 100644 --- a/presenter_demo/Makefile +++ b/presenter_demo/Makefile @@ -6,17 +6,24 @@ all: presenter_demo.dsk presenter_demo.dsk: PRESENTER_DEMO \ TITLE.HGR PLASMA_DPOSV.HGR \ - RAPL_MICRO.HGR \ - NUM.SHAPE MMM + RAPL_MICRO.HGR WATTSUP.HGR \ + NUM.SHAPE MMM LOAD_DEMO ../dos33 presenter_demo.dsk SAVE A PRESENTER_DEMO ../dos33 presenter_demo.dsk SAVE B TITLE.HGR ../dos33 presenter_demo.dsk SAVE B PLASMA_DPOSV.HGR ../dos33 presenter_demo.dsk SAVE B RAPL_MICRO.HGR ../dos33 presenter_demo.dsk SAVE B NUM.SHAPE + ../dos33 presenter_demo.dsk SAVE B WATTSUP.HGR ../dos33 presenter_demo.dsk SAVE A MMM + ../dos33 presenter_demo.dsk SAVE A LOAD_DEMO +LOAD_DEMO: load_demo.bas + ../tokenize_asoft < load_demo.bas > LOAD_DEMO -RAPL_MICRO.HGR: rapl_micro.pcx +WATTSUP.HGR: wattsup.pcx + ../pcx2hgr wattsup.pcx > WATTSUP.HGR + +RAPL_MICRO.HGR: rapl_micro.pcx ../pcx2hgr rapl_micro.pcx > RAPL_MICRO.HGR @@ -39,5 +46,5 @@ demo.bas: ../asoft_presenter info ../asoft_presenter . > demo.bas clean: - rm -f *~ *.IMG *.HGR demo.bas PRESENTER_DEMO NUM.SHAPE MMM + rm -f *~ *.IMG *.HGR demo.bas PRESENTER_DEMO NUM.SHAPE MMM LOAD_DEMO diff --git a/presenter_demo/info b/presenter_demo/info index d4347d7e..80dca343 100644 --- a/presenter_demo/info +++ b/presenter_demo/info @@ -8,12 +8,36 @@ EMAIL vweaver1@eecs.utk.edu SLIDES -slide_nvml_plot -slide_wattsup_plot -slide_rapl_intro -slide_piotr_img -slide_rapl_continued -slide_rapl_more -slide_rapl_finish slide_title +slide_01_intro +slide_02_intro +slide_03_piotr +slide_04_shortcomings +slide_05_papi +slide_06_papi_benefits +slide_07_current_components +slide_08_wattsup_pic +slide_09_wattsup +slide_10_wattsup_plot +slide_11_rapl_intro +slide_12_rapl_works +slide_13_rapl_more +slide_14_rapl_valid +slide_15_rapl_accuracy +slide_16_rapl_papi +slide_17_rapl_power +slide_18_rapl_energy +slide_19_nvml +slide_20_nvml_plot +slide_21_future_papi +slide_22_amd +slide_23_powermon +slide_24_user +slide_25_measuring +slide_26_events +slide_27_multiple +slide_28_questions +slide_29_appleIIe +slide_30_linpack +slide_31_questions END_SLIDES diff --git a/presenter_demo/load_demo.bas b/presenter_demo/load_demo.bas new file mode 100644 index 00000000..79aacc30 --- /dev/null +++ b/presenter_demo/load_demo.bas @@ -0,0 +1,5 @@ +10 REM LOAD BASIC PROGRAM INTO HGR2 AREA +15 PRINT "LOADING PRESENTER DEMO TO A$4000" +20 POKE 104,64 +30 POKE 16384,0 +40 PRINT CHR$(4);"LOAD PRESENTER_DEMO" diff --git a/presenter_demo/slide_01_intro b/presenter_demo/slide_01_intro new file mode 100644 index 00000000..6111e35a --- /dev/null +++ b/presenter_demo/slide_01_intro @@ -0,0 +1,11 @@ +40COL +%c%Power and Energy - Why do We Care? + +* New, massive, HPC machines use + impressive amounts of power + +* When you have 100k+ cores, saving + a few Joules per core quickly adds up + +* To improve power/energy draw, you + need some way of measuring it diff --git a/presenter_demo/slide_02_intro b/presenter_demo/slide_02_intro new file mode 100644 index 00000000..9c818c72 --- /dev/null +++ b/presenter_demo/slide_02_intro @@ -0,0 +1,18 @@ +40COL +%c%Energy/Power Measurement is Already +%c%Possible + +Three common ways of doing this: + +* Hand-instrumenting a system by + tapping all power inputs to CPU, + memory, disk, etc., and using a + data logger + +* Using a pass-through power meter + that you plug your server into. + Often these will log over USB + +* Estimating power/energy with a + software model based on + system behavior diff --git a/presenter_demo/slide_03_piotr b/presenter_demo/slide_03_piotr new file mode 100644 index 00000000..d8b53372 --- /dev/null +++ b/presenter_demo/slide_03_piotr @@ -0,0 +1,5 @@ +HGR +PLASMA_DPOSV.HGR +%c%Existing Related Work +%c%Plasma/dposv results with +%c%Virginia Tech's PowerPack diff --git a/presenter_demo/slide_04_shortcomings b/presenter_demo/slide_04_shortcomings new file mode 100644 index 00000000..0c92f4dc --- /dev/null +++ b/presenter_demo/slide_04_shortcomings @@ -0,0 +1,14 @@ +40COL +%c%Shortcomings of Current Methods + +* Each measurement platform has a + different interface + +* Typically data can only be recorded + off-line, to a separate logging + machine, and analysis is done after + the fact + +* Correlating energy/power with other + performance metrics can be difficult + diff --git a/presenter_demo/slide_05_papi b/presenter_demo/slide_05_papi new file mode 100644 index 00000000..3b38c258 --- /dev/null +++ b/presenter_demo/slide_05_papi @@ -0,0 +1,15 @@ +40COL +%c%Can we make this easier? + +%c%Use PAPI! + +* PAPI (Performance API) is a + platform-independent library for + gathering performance-related data + +* PAPI-C interface makes adding new + power measuring components + straightforward + +* PAPI can provide power/energy + results in-line to running programs diff --git a/presenter_demo/slide_06_papi_benefits b/presenter_demo/slide_06_papi_benefits new file mode 100644 index 00000000..d5ac6ebb --- /dev/null +++ b/presenter_demo/slide_06_papi_benefits @@ -0,0 +1,16 @@ +40COL +%c%More PAPI benefits + +* One interface for all power + measurement devices + +* Existing PAPI code and + instrumentation can easily be + extended to measure power + +* Existing high-level tools (Tau, + VAMPIR, etc.) can be used with + no changes + +* Easy to measure other performance + metrics at same time diff --git a/presenter_demo/slide_07_current_components b/presenter_demo/slide_07_current_components new file mode 100644 index 00000000..f7b4feec --- /dev/null +++ b/presenter_demo/slide_07_current_components @@ -0,0 +1,8 @@ +40COL +%c%Current PAPI Components + +* Various components are nearing + completion + +* Code for many of them already + available in papi.git diff --git a/presenter_demo/slide_08_wattsup_pic b/presenter_demo/slide_08_wattsup_pic new file mode 100644 index 00000000..53ea9a46 --- /dev/null +++ b/presenter_demo/slide_08_wattsup_pic @@ -0,0 +1,3 @@ +HGR +WATTSUP.HGR +%c%Watt's Up Pro Meter diff --git a/presenter_demo/slide_09_wattsup b/presenter_demo/slide_09_wattsup new file mode 100644 index 00000000..101e3758 --- /dev/null +++ b/presenter_demo/slide_09_wattsup @@ -0,0 +1,14 @@ +40COL +%c%Watt's Up Pro Features + +* Can measure 18 different values + with 1 second resolution + (Watts, Volts, Amps, etc.) + +* Values read over USB + +* Joules can be derived from power + and time + +* Can only measure system-wide + diff --git a/presenter_demo/slide_wattsup_plot b/presenter_demo/slide_10_wattsup_plot similarity index 88% rename from presenter_demo/slide_wattsup_plot rename to presenter_demo/slide_10_wattsup_plot index 4d3ad1d9..52cad79a 100644 --- a/presenter_demo/slide_wattsup_plot +++ b/presenter_demo/slide_10_wattsup_plot @@ -61,6 +61,6 @@ START 38 35.4 39 35.1 STOP - Average Power (W) versus Time (s) +%c%Average Power (W) versus Time (s) - PLASMA Cholesky on Intel Core2 +%c%PLASMA Cholesky on Intel Core2 diff --git a/presenter_demo/slide_11_rapl_intro b/presenter_demo/slide_11_rapl_intro new file mode 100644 index 00000000..04394f6e --- /dev/null +++ b/presenter_demo/slide_11_rapl_intro @@ -0,0 +1,12 @@ +40COL +%c%RAPL + +* Running Average Power Limit + +* Part of an infrastructure to allow + setting custom per-package hardware + enforced power limits + +* User Accessible Energy/Power + readings are a bonus feature + of the interface diff --git a/presenter_demo/slide_12_rapl_works b/presenter_demo/slide_12_rapl_works new file mode 100644 index 00000000..b4ac96d9 --- /dev/null +++ b/presenter_demo/slide_12_rapl_works @@ -0,0 +1,17 @@ +40COL +%c%How RAPL Works + +* RAPL is not an analog power meter + +* RAPL uses a software power model, + running on a helper controller + on the main chip package + +* Energy is estimated using various + hardware performance counters, + temperature, leakage models and I/O + +* The model is used for CPU throttling + and turbo-boost, but the values are + also exposed to users via a + model-specific register (MSR) diff --git a/presenter_demo/slide_13_rapl_more b/presenter_demo/slide_13_rapl_more new file mode 100644 index 00000000..5ab39214 --- /dev/null +++ b/presenter_demo/slide_13_rapl_more @@ -0,0 +1,18 @@ +40COL +%c%Available RAPL Readings + +* PACKAGE_ENERGY: total energy used + by entire package + +* PP0_ENERGY: energy used by + 'Power Plane 0' which includes + all cores and caches + +* PP1_ENERGY: on original Sandybridge + this includes the on-chip Intel GPU + +* DRAM_ENERGY: on Sandybridge EP this + measures DRAM energy usage. + It is unclear whether this is just + the interface or if it includes all + power used by all the DIMMs too diff --git a/presenter_demo/slide_14_rapl_valid b/presenter_demo/slide_14_rapl_valid new file mode 100644 index 00000000..fbcb61b6 --- /dev/null +++ b/presenter_demo/slide_14_rapl_valid @@ -0,0 +1,4 @@ +HGR +RAPL_MICRO.HGR +%c%Rotem at al. (IEEE Micro Mar/Apr 2012) +%c%Validate Against Actual Power Readings diff --git a/presenter_demo/slide_15_rapl_accuracy b/presenter_demo/slide_15_rapl_accuracy new file mode 100644 index 00000000..90408a7b --- /dev/null +++ b/presenter_demo/slide_15_rapl_accuracy @@ -0,0 +1,19 @@ +40COL +%c%RAPL Measurement Accuracy + +* Intel Documentation indicates Energy + readings are updated roughly every + millisecond (1kHz) + +* The hardware also reports measurement + quanta. This can vary among processor + releases. On our Sandybridge EP all + Energy measurements are in multiples + of 15.2nJ + +* Power and Energy can vary between + identical packages on a system, + even when running identical workloads. + It is unclear whether this is due to + process variation or else calibration + diff --git a/presenter_demo/slide_16_rapl_papi b/presenter_demo/slide_16_rapl_papi new file mode 100644 index 00000000..4d180e6b --- /dev/null +++ b/presenter_demo/slide_16_rapl_papi @@ -0,0 +1,17 @@ +40COL +%c%RAPL PAPI Interface + +* Access to RAPL data requires reading + a CPU MSR register. + This requires OS support + +* Linux currently has no driver and + likely won't for the near future + +* Linux does support an 'MSR' driver. + Given proper read permissions, MSRs + can be accessed via /dev/cpu/*/msr + +* PAPI uses the 'MSR' driver to gather + RAPL values + diff --git a/presenter_demo/slide_17_rapl_power b/presenter_demo/slide_17_rapl_power new file mode 100644 index 00000000..d2b5cb1f --- /dev/null +++ b/presenter_demo/slide_17_rapl_power @@ -0,0 +1,3 @@ +40COL +BLAH + diff --git a/presenter_demo/slide_18_rapl_energy b/presenter_demo/slide_18_rapl_energy new file mode 100644 index 00000000..d2b5cb1f --- /dev/null +++ b/presenter_demo/slide_18_rapl_energy @@ -0,0 +1,3 @@ +40COL +BLAH + diff --git a/presenter_demo/slide_19_nvml b/presenter_demo/slide_19_nvml new file mode 100644 index 00000000..6f91047d --- /dev/null +++ b/presenter_demo/slide_19_nvml @@ -0,0 +1,13 @@ +40COL +%c%NVML + +* Recent NVIDIA GPUs support reading + power via the NVIDIA Management + Library (NVML) + +* On Fermi C2075 GPUs it has milliwatt + resolution within +/- 5W and is + updated at roughly 60Hz + +* The power reported is that for the + entire board, GPU and memory diff --git a/presenter_demo/slide_nvml_plot b/presenter_demo/slide_20_nvml_plot similarity index 99% rename from presenter_demo/slide_nvml_plot rename to presenter_demo/slide_20_nvml_plot index 0b43ef1a..518a4867 100644 --- a/presenter_demo/slide_nvml_plot +++ b/presenter_demo/slide_20_nvml_plot @@ -287,3 +287,4 @@ START 2.66 122.18 2.67 117.74 STOP +%c%MAGMA LU 10,000, Nvidia Fermi C2075 diff --git a/presenter_demo/slide_21_future_papi b/presenter_demo/slide_21_future_papi new file mode 100644 index 00000000..dc8a8a69 --- /dev/null +++ b/presenter_demo/slide_21_future_papi @@ -0,0 +1,6 @@ +40COL +%c%Near-future PAPI Components + +%c%These components do not exist yet, +%c%but support for them should be +%c%straightforward. diff --git a/presenter_demo/slide_22_amd b/presenter_demo/slide_22_amd new file mode 100644 index 00000000..163cad32 --- /dev/null +++ b/presenter_demo/slide_22_amd @@ -0,0 +1,14 @@ +40COL +%c%AMD Application Power Management + +* Recent AMD Family 15h processors + report 'Current Power In Watts' + via Processor Power in the TDP MSR + +* Support for this can be provided + similar to RAPL + +* We just need an Interlagos system + where someone gives us read + permissions to /dev/cpu/*/msr + diff --git a/presenter_demo/slide_23_powermon b/presenter_demo/slide_23_powermon new file mode 100644 index 00000000..66245472 --- /dev/null +++ b/presenter_demo/slide_23_powermon @@ -0,0 +1,12 @@ +40COL +%c%PowerMon 2 + +* PowerMon 2: a custom board from RENCI + +* Plugs in-line with ATX power supply + +* Reports results over USB + +* 8 channels, 1kHz sample rate + +* We have hardware; still debugging diff --git a/presenter_demo/slide_24_user b/presenter_demo/slide_24_user new file mode 100644 index 00000000..15945f00 --- /dev/null +++ b/presenter_demo/slide_24_user @@ -0,0 +1,14 @@ +40COL +%c%PAPI-based Power Models + +* There's a lot of related work on + estimating energy/power using + performance counters + +* PAPI user-defined events can be + used to create power models using + existing events + +* Previous work (McKee et al.) shows + accuracy to within 10%% + diff --git a/presenter_demo/slide_25_measuring b/presenter_demo/slide_25_measuring new file mode 100644 index 00000000..0116ac62 --- /dev/null +++ b/presenter_demo/slide_25_measuring @@ -0,0 +1,7 @@ +40COL +%c%Measuring using PAPI + +* Measuring Energy/Power with PAPI is + done the same as measuring any other + event + diff --git a/presenter_demo/slide_26_events b/presenter_demo/slide_26_events new file mode 100644 index 00000000..977a30cf --- /dev/null +++ b/presenter_demo/slide_26_events @@ -0,0 +1,19 @@ +40COL +%c%Listing Events + +> papi_native_avail +... +==================================== + Events in Component: linux-rapl +==================================== +------------------------------------ +| PACKAGE_ENERGY:PACKAGE0 +| Energy used by chip package 0 +------------------------------------ +| PACKAGE_ENERGY:PACKAGE1 +| Energy used by chip package 1 +------------------------------------ +| DRAM_ENERGY:PACKAGE0 +| Energy used by DRAM on package 0 +------------------------------------ +... diff --git a/presenter_demo/slide_27_multiple b/presenter_demo/slide_27_multiple new file mode 100644 index 00000000..4aae1d52 --- /dev/null +++ b/presenter_demo/slide_27_multiple @@ -0,0 +1,4 @@ +40COL +%c%Measuring Multiple at Once + + diff --git a/presenter_demo/slide_28_questions b/presenter_demo/slide_28_questions new file mode 100644 index 00000000..6bebb707 --- /dev/null +++ b/presenter_demo/slide_28_questions @@ -0,0 +1,7 @@ +40COL + + + + + +%c%Questions before Digressing? diff --git a/presenter_demo/slide_29_appleIIe b/presenter_demo/slide_29_appleIIe new file mode 100644 index 00000000..4f2f944b --- /dev/null +++ b/presenter_demo/slide_29_appleIIe @@ -0,0 +1,10 @@ +40COL +%c%Apple IIe + +* Apple II released in 1977 +* Apple IIe Platinum released in 1987 +* 1MHz 65C02 Processor, 128kB RAM +* 280x192, 6-color graphics + (IIe can do DoubleHiRes) +* Power: 18 - 20W + diff --git a/presenter_demo/slide_30_linpack b/presenter_demo/slide_30_linpack new file mode 100644 index 00000000..f3db585a --- /dev/null +++ b/presenter_demo/slide_30_linpack @@ -0,0 +1,12 @@ +40COL +%c%Linpack Results + +10x10 Matrix-matrix multiply +START +STOP +HOW MANY SECONDS? 15 +133.333333 FLOP/s + +Yes I know using BASIC is unfair +But I am too lazy to code up a +6502 FP implementation in assembler diff --git a/presenter_demo/slide_31_questions b/presenter_demo/slide_31_questions new file mode 100644 index 00000000..fd06b95c --- /dev/null +++ b/presenter_demo/slide_31_questions @@ -0,0 +1,7 @@ +40COL + + + + +%c%QUESTIONS? + diff --git a/presenter_demo/slide_piotr_img b/presenter_demo/slide_piotr_img deleted file mode 100644 index 18d744cb..00000000 --- a/presenter_demo/slide_piotr_img +++ /dev/null @@ -1,2 +0,0 @@ -HGR -PLASMA_DPOSV.HGR diff --git a/presenter_demo/slide_rapl_continued b/presenter_demo/slide_rapl_continued deleted file mode 100644 index 80b2969d..00000000 --- a/presenter_demo/slide_rapl_continued +++ /dev/null @@ -1,3 +0,0 @@ -40COL -RAPL CONTINUE -* Is freally awesome diff --git a/presenter_demo/slide_rapl_finish b/presenter_demo/slide_rapl_finish deleted file mode 100644 index 03976c34..00000000 --- a/presenter_demo/slide_rapl_finish +++ /dev/null @@ -1,3 +0,0 @@ -40COL -RAPL LAST -* Is really awesome diff --git a/presenter_demo/slide_rapl_intro b/presenter_demo/slide_rapl_intro deleted file mode 100644 index ffcfbad7..00000000 --- a/presenter_demo/slide_rapl_intro +++ /dev/null @@ -1,4 +0,0 @@ -40COL -#c#RAPL - -* Is really awesome diff --git a/presenter_demo/slide_rapl_more b/presenter_demo/slide_rapl_more deleted file mode 100644 index bcdfb12b..00000000 --- a/presenter_demo/slide_rapl_more +++ /dev/null @@ -1,3 +0,0 @@ -40COL -RAPL MORE -* Is really awesome