From c7ab3d4075a1f48cf51d95041211129371de9b23 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Wed, 18 Nov 2020 17:32:11 -0500 Subject: [PATCH] Reduces cost of bookending video data. --- Outputs/ScanTarget.hpp | 1 + Outputs/ScanTargets/BufferingScanTarget.cpp | 27 ++++++++++++--------- Outputs/ScanTargets/BufferingScanTarget.hpp | 5 ++++ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/Outputs/ScanTarget.hpp b/Outputs/ScanTarget.hpp index 408092bae..dc4074d95 100644 --- a/Outputs/ScanTarget.hpp +++ b/Outputs/ScanTarget.hpp @@ -94,6 +94,7 @@ enum class InputDataType { }; /// @returns the number of bytes per sample for data of type @c data_type. +/// Guaranteed to be 1, 2 or 4 for valid data types. constexpr inline size_t size_for_data_type(InputDataType data_type) { switch(data_type) { case InputDataType::Luminance1: diff --git a/Outputs/ScanTargets/BufferingScanTarget.cpp b/Outputs/ScanTargets/BufferingScanTarget.cpp index c2d0286aa..bd111574a 100644 --- a/Outputs/ScanTargets/BufferingScanTarget.cpp +++ b/Outputs/ScanTargets/BufferingScanTarget.cpp @@ -89,28 +89,21 @@ uint8_t *BufferingScanTarget::begin_data(size_t required_length, size_t required // write_pointers_.write_area points to the first pixel the client is expected to draw to. } -void BufferingScanTarget::end_data(size_t actual_length) { +template void BufferingScanTarget::end_data(size_t actual_length) { // Acquire the producer lock. std::lock_guard lock_guard(producer_mutex_); // Do nothing if no data write is actually ongoing. if(allocation_has_failed_ || !data_is_allocated_) return; - // Bookend the start of the new data, to safeguard for precision errors in sampling. - memcpy( - &write_area_[size_t(write_pointers_.write_area - 1) * data_type_size_], - &write_area_[size_t(write_pointers_.write_area) * data_type_size_], - data_type_size_); + // Bookend the start and end of the new data, to safeguard for precision errors in sampling. + DataUnit *const sized_write_area = &reinterpret_cast(write_area_)[write_pointers_.write_area]; + sized_write_area[-1] = sized_write_area[0]; + sized_write_area[actual_length] = sized_write_area[actual_length - 1]; // Advance to the end of the current run. write_pointers_.write_area += actual_length + 1; - // Also bookend the end. - memcpy( - &write_area_[size_t(write_pointers_.write_area - 1) * data_type_size_], - &write_area_[size_t(write_pointers_.write_area - 2) * data_type_size_], - data_type_size_); - // The write area was allocated in the knowledge that there's sufficient // distance left on the current line, but there's a risk of exactly filling // the final line, in which case this should wrap back to 0. @@ -120,6 +113,16 @@ void BufferingScanTarget::end_data(size_t actual_length) { data_is_allocated_ = false; } +void BufferingScanTarget::end_data(size_t actual_length) { + // Just dispatch appropriately. + switch(data_type_size_) { + default: break; + case 1: end_data(actual_length); break; + case 2: end_data(actual_length); break; + case 4: end_data(actual_length); break; + } +} + // MARK: - Producer; scans. Outputs::Display::ScanTarget::Scan *BufferingScanTarget::begin_scan() { diff --git a/Outputs/ScanTargets/BufferingScanTarget.hpp b/Outputs/ScanTargets/BufferingScanTarget.hpp index aa74e6579..4b41d0a8b 100644 --- a/Outputs/ScanTargets/BufferingScanTarget.hpp +++ b/Outputs/ScanTargets/BufferingScanTarget.hpp @@ -255,6 +255,11 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget { Modals modals_; bool modals_are_dirty_ = false; + // Provides a per-data size implementation of end_data; a previous + // implementation used blind memcpy and that turned into something + // of a profiling hot spot. + template void end_data(size_t actual_length); + #ifndef NDEBUG // Debug features; these amount to API validation. bool scan_is_ongoing_ = false;