mirror of
https://github.com/deater/dos33fsprogs.git
synced 2025-05-13 01:47:07 +00:00
Update slides
This commit is contained in:
parent
9094ffafbb
commit
772f596bb9
@ -6,17 +6,24 @@ all: presenter_demo.dsk
|
|||||||
|
|
||||||
presenter_demo.dsk: PRESENTER_DEMO \
|
presenter_demo.dsk: PRESENTER_DEMO \
|
||||||
TITLE.HGR PLASMA_DPOSV.HGR \
|
TITLE.HGR PLASMA_DPOSV.HGR \
|
||||||
RAPL_MICRO.HGR \
|
RAPL_MICRO.HGR WATTSUP.HGR \
|
||||||
NUM.SHAPE MMM
|
NUM.SHAPE MMM LOAD_DEMO
|
||||||
../dos33 presenter_demo.dsk SAVE A PRESENTER_DEMO
|
../dos33 presenter_demo.dsk SAVE A PRESENTER_DEMO
|
||||||
../dos33 presenter_demo.dsk SAVE B TITLE.HGR
|
../dos33 presenter_demo.dsk SAVE B TITLE.HGR
|
||||||
../dos33 presenter_demo.dsk SAVE B PLASMA_DPOSV.HGR
|
../dos33 presenter_demo.dsk SAVE B PLASMA_DPOSV.HGR
|
||||||
../dos33 presenter_demo.dsk SAVE B RAPL_MICRO.HGR
|
../dos33 presenter_demo.dsk SAVE B RAPL_MICRO.HGR
|
||||||
../dos33 presenter_demo.dsk SAVE B NUM.SHAPE
|
../dos33 presenter_demo.dsk SAVE B NUM.SHAPE
|
||||||
|
../dos33 presenter_demo.dsk SAVE B WATTSUP.HGR
|
||||||
../dos33 presenter_demo.dsk SAVE A MMM
|
../dos33 presenter_demo.dsk SAVE A MMM
|
||||||
|
../dos33 presenter_demo.dsk SAVE A LOAD_DEMO
|
||||||
|
|
||||||
|
LOAD_DEMO: load_demo.bas
|
||||||
|
../tokenize_asoft < load_demo.bas > LOAD_DEMO
|
||||||
|
|
||||||
RAPL_MICRO.HGR: rapl_micro.pcx
|
WATTSUP.HGR: wattsup.pcx
|
||||||
|
../pcx2hgr wattsup.pcx > WATTSUP.HGR
|
||||||
|
|
||||||
|
RAPL_MICRO.HGR: rapl_micro.pcx
|
||||||
../pcx2hgr rapl_micro.pcx > RAPL_MICRO.HGR
|
../pcx2hgr rapl_micro.pcx > RAPL_MICRO.HGR
|
||||||
|
|
||||||
|
|
||||||
@ -39,5 +46,5 @@ demo.bas: ../asoft_presenter info
|
|||||||
../asoft_presenter . > demo.bas
|
../asoft_presenter . > demo.bas
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *~ *.IMG *.HGR demo.bas PRESENTER_DEMO NUM.SHAPE MMM
|
rm -f *~ *.IMG *.HGR demo.bas PRESENTER_DEMO NUM.SHAPE MMM LOAD_DEMO
|
||||||
|
|
||||||
|
@ -8,12 +8,36 @@ EMAIL
|
|||||||
vweaver1@eecs.utk.edu
|
vweaver1@eecs.utk.edu
|
||||||
|
|
||||||
SLIDES
|
SLIDES
|
||||||
slide_nvml_plot
|
|
||||||
slide_wattsup_plot
|
|
||||||
slide_rapl_intro
|
|
||||||
slide_piotr_img
|
|
||||||
slide_rapl_continued
|
|
||||||
slide_rapl_more
|
|
||||||
slide_rapl_finish
|
|
||||||
slide_title
|
slide_title
|
||||||
|
slide_01_intro
|
||||||
|
slide_02_intro
|
||||||
|
slide_03_piotr
|
||||||
|
slide_04_shortcomings
|
||||||
|
slide_05_papi
|
||||||
|
slide_06_papi_benefits
|
||||||
|
slide_07_current_components
|
||||||
|
slide_08_wattsup_pic
|
||||||
|
slide_09_wattsup
|
||||||
|
slide_10_wattsup_plot
|
||||||
|
slide_11_rapl_intro
|
||||||
|
slide_12_rapl_works
|
||||||
|
slide_13_rapl_more
|
||||||
|
slide_14_rapl_valid
|
||||||
|
slide_15_rapl_accuracy
|
||||||
|
slide_16_rapl_papi
|
||||||
|
slide_17_rapl_power
|
||||||
|
slide_18_rapl_energy
|
||||||
|
slide_19_nvml
|
||||||
|
slide_20_nvml_plot
|
||||||
|
slide_21_future_papi
|
||||||
|
slide_22_amd
|
||||||
|
slide_23_powermon
|
||||||
|
slide_24_user
|
||||||
|
slide_25_measuring
|
||||||
|
slide_26_events
|
||||||
|
slide_27_multiple
|
||||||
|
slide_28_questions
|
||||||
|
slide_29_appleIIe
|
||||||
|
slide_30_linpack
|
||||||
|
slide_31_questions
|
||||||
END_SLIDES
|
END_SLIDES
|
||||||
|
5
presenter_demo/load_demo.bas
Normal file
5
presenter_demo/load_demo.bas
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
10 REM LOAD BASIC PROGRAM INTO HGR2 AREA
|
||||||
|
15 PRINT "LOADING PRESENTER DEMO TO A$4000"
|
||||||
|
20 POKE 104,64
|
||||||
|
30 POKE 16384,0
|
||||||
|
40 PRINT CHR$(4);"LOAD PRESENTER_DEMO"
|
11
presenter_demo/slide_01_intro
Normal file
11
presenter_demo/slide_01_intro
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
40COL
|
||||||
|
%c%Power and Energy - Why do We Care?
|
||||||
|
|
||||||
|
* New, massive, HPC machines use
|
||||||
|
impressive amounts of power
|
||||||
|
|
||||||
|
* When you have 100k+ cores, saving
|
||||||
|
a few Joules per core quickly adds up
|
||||||
|
|
||||||
|
* To improve power/energy draw, you
|
||||||
|
need some way of measuring it
|
18
presenter_demo/slide_02_intro
Normal file
18
presenter_demo/slide_02_intro
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
40COL
|
||||||
|
%c%Energy/Power Measurement is Already
|
||||||
|
%c%Possible
|
||||||
|
|
||||||
|
Three common ways of doing this:
|
||||||
|
|
||||||
|
* Hand-instrumenting a system by
|
||||||
|
tapping all power inputs to CPU,
|
||||||
|
memory, disk, etc., and using a
|
||||||
|
data logger
|
||||||
|
|
||||||
|
* Using a pass-through power meter
|
||||||
|
that you plug your server into.
|
||||||
|
Often these will log over USB
|
||||||
|
|
||||||
|
* Estimating power/energy with a
|
||||||
|
software model based on
|
||||||
|
system behavior
|
5
presenter_demo/slide_03_piotr
Normal file
5
presenter_demo/slide_03_piotr
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
HGR
|
||||||
|
PLASMA_DPOSV.HGR
|
||||||
|
%c%Existing Related Work
|
||||||
|
%c%Plasma/dposv results with
|
||||||
|
%c%Virginia Tech's PowerPack
|
14
presenter_demo/slide_04_shortcomings
Normal file
14
presenter_demo/slide_04_shortcomings
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
40COL
|
||||||
|
%c%Shortcomings of Current Methods
|
||||||
|
|
||||||
|
* Each measurement platform has a
|
||||||
|
different interface
|
||||||
|
|
||||||
|
* Typically data can only be recorded
|
||||||
|
off-line, to a separate logging
|
||||||
|
machine, and analysis is done after
|
||||||
|
the fact
|
||||||
|
|
||||||
|
* Correlating energy/power with other
|
||||||
|
performance metrics can be difficult
|
||||||
|
|
15
presenter_demo/slide_05_papi
Normal file
15
presenter_demo/slide_05_papi
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
40COL
|
||||||
|
%c%Can we make this easier?
|
||||||
|
|
||||||
|
%c%Use PAPI!
|
||||||
|
|
||||||
|
* PAPI (Performance API) is a
|
||||||
|
platform-independent library for
|
||||||
|
gathering performance-related data
|
||||||
|
|
||||||
|
* PAPI-C interface makes adding new
|
||||||
|
power measuring components
|
||||||
|
straightforward
|
||||||
|
|
||||||
|
* PAPI can provide power/energy
|
||||||
|
results in-line to running programs
|
16
presenter_demo/slide_06_papi_benefits
Normal file
16
presenter_demo/slide_06_papi_benefits
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
40COL
|
||||||
|
%c%More PAPI benefits
|
||||||
|
|
||||||
|
* One interface for all power
|
||||||
|
measurement devices
|
||||||
|
|
||||||
|
* Existing PAPI code and
|
||||||
|
instrumentation can easily be
|
||||||
|
extended to measure power
|
||||||
|
|
||||||
|
* Existing high-level tools (Tau,
|
||||||
|
VAMPIR, etc.) can be used with
|
||||||
|
no changes
|
||||||
|
|
||||||
|
* Easy to measure other performance
|
||||||
|
metrics at same time
|
8
presenter_demo/slide_07_current_components
Normal file
8
presenter_demo/slide_07_current_components
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
40COL
|
||||||
|
%c%Current PAPI Components
|
||||||
|
|
||||||
|
* Various components are nearing
|
||||||
|
completion
|
||||||
|
|
||||||
|
* Code for many of them already
|
||||||
|
available in papi.git
|
3
presenter_demo/slide_08_wattsup_pic
Normal file
3
presenter_demo/slide_08_wattsup_pic
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
HGR
|
||||||
|
WATTSUP.HGR
|
||||||
|
%c%Watt's Up Pro Meter
|
14
presenter_demo/slide_09_wattsup
Normal file
14
presenter_demo/slide_09_wattsup
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
40COL
|
||||||
|
%c%Watt's Up Pro Features
|
||||||
|
|
||||||
|
* Can measure 18 different values
|
||||||
|
with 1 second resolution
|
||||||
|
(Watts, Volts, Amps, etc.)
|
||||||
|
|
||||||
|
* Values read over USB
|
||||||
|
|
||||||
|
* Joules can be derived from power
|
||||||
|
and time
|
||||||
|
|
||||||
|
* Can only measure system-wide
|
||||||
|
|
@ -61,6 +61,6 @@ START
|
|||||||
38 35.4
|
38 35.4
|
||||||
39 35.1
|
39 35.1
|
||||||
STOP
|
STOP
|
||||||
Average Power (W) versus Time (s)
|
%c%Average Power (W) versus Time (s)
|
||||||
|
|
||||||
PLASMA Cholesky on Intel Core2
|
%c%PLASMA Cholesky on Intel Core2
|
12
presenter_demo/slide_11_rapl_intro
Normal file
12
presenter_demo/slide_11_rapl_intro
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
40COL
|
||||||
|
%c%RAPL
|
||||||
|
|
||||||
|
* Running Average Power Limit
|
||||||
|
|
||||||
|
* Part of an infrastructure to allow
|
||||||
|
setting custom per-package hardware
|
||||||
|
enforced power limits
|
||||||
|
|
||||||
|
* User Accessible Energy/Power
|
||||||
|
readings are a bonus feature
|
||||||
|
of the interface
|
17
presenter_demo/slide_12_rapl_works
Normal file
17
presenter_demo/slide_12_rapl_works
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
40COL
|
||||||
|
%c%How RAPL Works
|
||||||
|
|
||||||
|
* RAPL is not an analog power meter
|
||||||
|
|
||||||
|
* RAPL uses a software power model,
|
||||||
|
running on a helper controller
|
||||||
|
on the main chip package
|
||||||
|
|
||||||
|
* Energy is estimated using various
|
||||||
|
hardware performance counters,
|
||||||
|
temperature, leakage models and I/O
|
||||||
|
|
||||||
|
* The model is used for CPU throttling
|
||||||
|
and turbo-boost, but the values are
|
||||||
|
also exposed to users via a
|
||||||
|
model-specific register (MSR)
|
18
presenter_demo/slide_13_rapl_more
Normal file
18
presenter_demo/slide_13_rapl_more
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
40COL
|
||||||
|
%c%Available RAPL Readings
|
||||||
|
|
||||||
|
* PACKAGE_ENERGY: total energy used
|
||||||
|
by entire package
|
||||||
|
|
||||||
|
* PP0_ENERGY: energy used by
|
||||||
|
'Power Plane 0' which includes
|
||||||
|
all cores and caches
|
||||||
|
|
||||||
|
* PP1_ENERGY: on original Sandybridge
|
||||||
|
this includes the on-chip Intel GPU
|
||||||
|
|
||||||
|
* DRAM_ENERGY: on Sandybridge EP this
|
||||||
|
measures DRAM energy usage.
|
||||||
|
It is unclear whether this is just
|
||||||
|
the interface or if it includes all
|
||||||
|
power used by all the DIMMs too
|
4
presenter_demo/slide_14_rapl_valid
Normal file
4
presenter_demo/slide_14_rapl_valid
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
HGR
|
||||||
|
RAPL_MICRO.HGR
|
||||||
|
%c%Rotem at al. (IEEE Micro Mar/Apr 2012)
|
||||||
|
%c%Validate Against Actual Power Readings
|
19
presenter_demo/slide_15_rapl_accuracy
Normal file
19
presenter_demo/slide_15_rapl_accuracy
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
40COL
|
||||||
|
%c%RAPL Measurement Accuracy
|
||||||
|
|
||||||
|
* Intel Documentation indicates Energy
|
||||||
|
readings are updated roughly every
|
||||||
|
millisecond (1kHz)
|
||||||
|
|
||||||
|
* The hardware also reports measurement
|
||||||
|
quanta. This can vary among processor
|
||||||
|
releases. On our Sandybridge EP all
|
||||||
|
Energy measurements are in multiples
|
||||||
|
of 15.2nJ
|
||||||
|
|
||||||
|
* Power and Energy can vary between
|
||||||
|
identical packages on a system,
|
||||||
|
even when running identical workloads.
|
||||||
|
It is unclear whether this is due to
|
||||||
|
process variation or else calibration
|
||||||
|
|
17
presenter_demo/slide_16_rapl_papi
Normal file
17
presenter_demo/slide_16_rapl_papi
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
40COL
|
||||||
|
%c%RAPL PAPI Interface
|
||||||
|
|
||||||
|
* Access to RAPL data requires reading
|
||||||
|
a CPU MSR register.
|
||||||
|
This requires OS support
|
||||||
|
|
||||||
|
* Linux currently has no driver and
|
||||||
|
likely won't for the near future
|
||||||
|
|
||||||
|
* Linux does support an 'MSR' driver.
|
||||||
|
Given proper read permissions, MSRs
|
||||||
|
can be accessed via /dev/cpu/*/msr
|
||||||
|
|
||||||
|
* PAPI uses the 'MSR' driver to gather
|
||||||
|
RAPL values
|
||||||
|
|
3
presenter_demo/slide_17_rapl_power
Normal file
3
presenter_demo/slide_17_rapl_power
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
40COL
|
||||||
|
BLAH
|
||||||
|
|
3
presenter_demo/slide_18_rapl_energy
Normal file
3
presenter_demo/slide_18_rapl_energy
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
40COL
|
||||||
|
BLAH
|
||||||
|
|
13
presenter_demo/slide_19_nvml
Normal file
13
presenter_demo/slide_19_nvml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
40COL
|
||||||
|
%c%NVML
|
||||||
|
|
||||||
|
* Recent NVIDIA GPUs support reading
|
||||||
|
power via the NVIDIA Management
|
||||||
|
Library (NVML)
|
||||||
|
|
||||||
|
* On Fermi C2075 GPUs it has milliwatt
|
||||||
|
resolution within +/- 5W and is
|
||||||
|
updated at roughly 60Hz
|
||||||
|
|
||||||
|
* The power reported is that for the
|
||||||
|
entire board, GPU and memory
|
@ -287,3 +287,4 @@ START
|
|||||||
2.66 122.18
|
2.66 122.18
|
||||||
2.67 117.74
|
2.67 117.74
|
||||||
STOP
|
STOP
|
||||||
|
%c%MAGMA LU 10,000, Nvidia Fermi C2075
|
6
presenter_demo/slide_21_future_papi
Normal file
6
presenter_demo/slide_21_future_papi
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
40COL
|
||||||
|
%c%Near-future PAPI Components
|
||||||
|
|
||||||
|
%c%These components do not exist yet,
|
||||||
|
%c%but support for them should be
|
||||||
|
%c%straightforward.
|
14
presenter_demo/slide_22_amd
Normal file
14
presenter_demo/slide_22_amd
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
40COL
|
||||||
|
%c%AMD Application Power Management
|
||||||
|
|
||||||
|
* Recent AMD Family 15h processors
|
||||||
|
report 'Current Power In Watts'
|
||||||
|
via Processor Power in the TDP MSR
|
||||||
|
|
||||||
|
* Support for this can be provided
|
||||||
|
similar to RAPL
|
||||||
|
|
||||||
|
* We just need an Interlagos system
|
||||||
|
where someone gives us read
|
||||||
|
permissions to /dev/cpu/*/msr
|
||||||
|
|
12
presenter_demo/slide_23_powermon
Normal file
12
presenter_demo/slide_23_powermon
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
40COL
|
||||||
|
%c%PowerMon 2
|
||||||
|
|
||||||
|
* PowerMon 2: a custom board from RENCI
|
||||||
|
|
||||||
|
* Plugs in-line with ATX power supply
|
||||||
|
|
||||||
|
* Reports results over USB
|
||||||
|
|
||||||
|
* 8 channels, 1kHz sample rate
|
||||||
|
|
||||||
|
* We have hardware; still debugging
|
14
presenter_demo/slide_24_user
Normal file
14
presenter_demo/slide_24_user
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
40COL
|
||||||
|
%c%PAPI-based Power Models
|
||||||
|
|
||||||
|
* There's a lot of related work on
|
||||||
|
estimating energy/power using
|
||||||
|
performance counters
|
||||||
|
|
||||||
|
* PAPI user-defined events can be
|
||||||
|
used to create power models using
|
||||||
|
existing events
|
||||||
|
|
||||||
|
* Previous work (McKee et al.) shows
|
||||||
|
accuracy to within 10%%
|
||||||
|
|
7
presenter_demo/slide_25_measuring
Normal file
7
presenter_demo/slide_25_measuring
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
40COL
|
||||||
|
%c%Measuring using PAPI
|
||||||
|
|
||||||
|
* Measuring Energy/Power with PAPI is
|
||||||
|
done the same as measuring any other
|
||||||
|
event
|
||||||
|
|
19
presenter_demo/slide_26_events
Normal file
19
presenter_demo/slide_26_events
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
40COL
|
||||||
|
%c%Listing Events
|
||||||
|
|
||||||
|
> papi_native_avail
|
||||||
|
...
|
||||||
|
====================================
|
||||||
|
Events in Component: linux-rapl
|
||||||
|
====================================
|
||||||
|
------------------------------------
|
||||||
|
| PACKAGE_ENERGY:PACKAGE0
|
||||||
|
| Energy used by chip package 0
|
||||||
|
------------------------------------
|
||||||
|
| PACKAGE_ENERGY:PACKAGE1
|
||||||
|
| Energy used by chip package 1
|
||||||
|
------------------------------------
|
||||||
|
| DRAM_ENERGY:PACKAGE0
|
||||||
|
| Energy used by DRAM on package 0
|
||||||
|
------------------------------------
|
||||||
|
...
|
4
presenter_demo/slide_27_multiple
Normal file
4
presenter_demo/slide_27_multiple
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
40COL
|
||||||
|
%c%Measuring Multiple at Once
|
||||||
|
|
||||||
|
|
7
presenter_demo/slide_28_questions
Normal file
7
presenter_demo/slide_28_questions
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
40COL
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
%c%Questions before Digressing?
|
10
presenter_demo/slide_29_appleIIe
Normal file
10
presenter_demo/slide_29_appleIIe
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
40COL
|
||||||
|
%c%Apple IIe
|
||||||
|
|
||||||
|
* Apple II released in 1977
|
||||||
|
* Apple IIe Platinum released in 1987
|
||||||
|
* 1MHz 65C02 Processor, 128kB RAM
|
||||||
|
* 280x192, 6-color graphics
|
||||||
|
(IIe can do DoubleHiRes)
|
||||||
|
* Power: 18 - 20W
|
||||||
|
|
12
presenter_demo/slide_30_linpack
Normal file
12
presenter_demo/slide_30_linpack
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
40COL
|
||||||
|
%c%Linpack Results
|
||||||
|
|
||||||
|
10x10 Matrix-matrix multiply
|
||||||
|
START
|
||||||
|
STOP
|
||||||
|
HOW MANY SECONDS? 15
|
||||||
|
133.333333 FLOP/s
|
||||||
|
|
||||||
|
Yes I know using BASIC is unfair
|
||||||
|
But I am too lazy to code up a
|
||||||
|
6502 FP implementation in assembler
|
7
presenter_demo/slide_31_questions
Normal file
7
presenter_demo/slide_31_questions
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
40COL
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
%c%QUESTIONS?
|
||||||
|
|
@ -1,2 +0,0 @@
|
|||||||
HGR
|
|
||||||
PLASMA_DPOSV.HGR
|
|
@ -1,3 +0,0 @@
|
|||||||
40COL
|
|
||||||
RAPL CONTINUE
|
|
||||||
* Is freally awesome
|
|
@ -1,3 +0,0 @@
|
|||||||
40COL
|
|
||||||
RAPL LAST
|
|
||||||
* Is really awesome
|
|
@ -1,4 +0,0 @@
|
|||||||
40COL
|
|
||||||
#c#RAPL
|
|
||||||
|
|
||||||
* Is really awesome
|
|
@ -1,3 +0,0 @@
|
|||||||
40COL
|
|
||||||
RAPL MORE
|
|
||||||
* Is really awesome
|
|
Loading…
x
Reference in New Issue
Block a user