diff --git a/MacGLide/OpenGLide/Framebuffer.cpp b/MacGLide/OpenGLide/Framebuffer.cpp index 849267b..ead2b72 100644 --- a/MacGLide/OpenGLide/Framebuffer.cpp +++ b/MacGLide/OpenGLide/Framebuffer.cpp @@ -1 +1 @@ -//************************************************************** //* OpenGLide - Glide to OpenGL Wrapper //* http://openglide.sourceforge.net //* //* framebuffer emulation //* //* OpenGLide is OpenSource under LGPL license //* Mac version and additional features by Jens-Olaf Hemprich //************************************************************** #include "Framebuffer.h" #include "Glide.h" #include "GlideApplication.h" #include "GlideSettings.h" #include "GLRender.h" #include "GLRenderUpdateState.h" #include "GLColorAlphaCombineEnvTables.h" // check if tile needs to be displayed #define CHECK_RENDER_TILE // Display small dots at opposite corners of rendered framebuffer tiles //#define DEBUG_TILE_RENDERING Framebuffer::Framebuffer() : m_x_step_start(0) , m_y_step_start(0) , m_x_step_start_opaque(0) , m_y_step_start_opaque(0) , m_width(0) , m_height(0) , m_framebuffer(NULL) , m_texbuffer(NULL) , m_origin(GR_ORIGIN_UPPER_LEFT) , m_glInternalFormat(-1) , m_glFormat(-1) , m_glType(-1) , m_glDepth(1.0f) , m_format_valid(false) , m_use_client_storage(false) , m_useRectangleARB(false) , m_must_clear_buffer(true) , m_custom_tilesizes(NULL) { // Don't set the checksum to 0, as this would cause white screen in Carmageddon // because fully black tile also have a 0 checksum and no texture data would be // download at all (and the tile would be rendered as if TEXTURE_2D was disabled) memset(m_tileChecksums, 0xff, sizeof(vector unsigned long) * m_tileCount); } Framebuffer::~Framebuffer() { } bool Framebuffer::initialise_buffers(BufferStruct* framebuffer, BufferStruct* texbuffer, FxU32 width, FxU32 height, const tilesize* tilesizetable) { #ifdef OGL_FRAMEBUFFER GlideMsg( "GlideFrameBuffer::initialise_buffers(---, ---, %d, %d, ---)\n", width, height); #endif m_custom_tilesizes = tilesizetable; return initialise_buffers(framebuffer, texbuffer, width, height, 0, 0); } bool Framebuffer::initialise_buffers(BufferStruct* framebuffer, BufferStruct* texbuffer, FxU32 width, FxU32 height, FxU32 x_tile, FxU32 y_tile) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::initialise_buffers(---, ---, %d, %d, %d, %d)\n", width, height, x_tile, y_tile); #endif m_framebuffer = framebuffer; m_texbuffer = texbuffer; m_framebuffer->WriteMode = m_texbuffer->WriteMode = GR_LFBWRITEMODE_UNUSED; m_width = width; m_height = height; // find out largest texture size GLint tile_size; glGetIntegerv(GL_MAX_TEXTURE_SIZE, &tile_size); m_x_step_start_opaque = tile_size; m_y_step_start_opaque = tile_size; m_x_step_start = min(tile_size, x_tile); m_y_step_start = min(tile_size, y_tile); m_x_step_start = max(16, m_x_step_start); m_y_step_start = max(16, m_y_step_start); m_useRectangleARB = InternalConfig.ARB_texture_rectangle && InternalConfig.EXT_compiled_vertex_array; // The texture priority is set to minimun because // frame buffer textures are never used a second time // @todo: This is not true anymore in all cases // because of the altivec checksum feature const GLfloat zero = 0.0f; glGenTextures(m_tileCount, &m_textureNames[0]); glPrioritizeTextures(m_tileCount, &m_textureNames[0], &zero); for(int i = 0; i < m_tileCount; i++) { const GLenum textureTarget = m_useRectangleARB ? GL_TEXTURE_RECTANGLE_ARB : GL_TEXTURE_2D; glBindTexture(textureTarget, m_textureNames[i]); if (m_useRectangleARB) { // This will not probably not work right now, because // we're using RGBA (the OS9 prefered texture format) // @todo: use ABGR??? in order to avoid byte swizzling // glTextureRangeAPPLE(GLenum target, GLsizei length, GLvoid *pointer); glTexParameteri(textureTarget, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_CACHED_APPLE); glReportError(); } glTexParameteri(textureTarget, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(textureTarget, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(textureTarget, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(textureTarget, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } // If a game has its own tilesize table, use // the largest tiles for opaque renderings GLint y_step = y_tile == 0 ? m_y_step_start_opaque : m_y_step_start; // init default/opaque tilesize table int w = 0; for(FxU32 y = 0; y < m_height && w < MaxTiles ; y += y_step, w++) { while (m_height - y < y_step) { y_step = y_step >> 1; } m_tilesizes[w].y = y_step; GLint x_step = x_tile == 0 ? m_x_step_start_opaque : m_x_step_start; int v = 0; for(FxU32 x = 0; x < m_width && v < MaxTiles; x += x_step, v++ ) { while (m_width - x < x_step) { x_step = x_step >> 1; } m_tilesizes[w].x[v] = x_step; } } // Build compiled vertex arrays if (InternalConfig.EXT_compiled_vertex_array) { // Store various render buffers indices m_tilesizesVertexArrayIndex = OGLRender.FrameBufferStartIndex; m_tilesizesCount = buildVertexArrays(&m_tilesizes[0], m_tilesizesVertexArrayIndex); if (m_custom_tilesizes) { m_customtilesizesVertexArrayIndex = m_tilesizesVertexArrayIndex + m_tilesizesCount * 2; m_customtilesizesCount = buildVertexArrays(m_custom_tilesizes, m_customtilesizesVertexArrayIndex); } } return m_width > 0 && m_height > 0 && m_x_step_start > 0 && m_y_step_start > 0 && m_format_valid; } void Framebuffer::free_buffers() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::free_buffers()\n"); #endif if (m_tilesizes) FreeObject(m_tilesizes); glDeleteTextures(m_tileCount, &m_textureNames[0]); } void Framebuffer::initialise_format(GrLfbWriteMode_t writemode) { #if defined(OGL_PART_DONE) || defined(OGL_FRAMEBUFFER) GlideMsg("Framebuffer::initialise_format(0x%x)\n", writemode); #endif // Enlarge buffer? if (writemode >= GR_LFBWRITEMODE_888 && (m_framebuffer->WriteMode < GR_LFBWRITEMODE_888 || m_framebuffer->WriteMode == GR_LFBWRITEMODE_UNUSED) && m_framebuffer->Address) { // Delete existing buffer FreeFrameBuffer(m_framebuffer->Address); m_framebuffer->Address = NULL; m_texbuffer->Address = NULL; } // Allocate 32-bit buffer (16bit buffer has been allocated in grSstWinOpen() if (m_framebuffer->Address == NULL) { unsigned long openglpixels = OpenGL.WindowWidth * OpenGL.WindowHeight; // Framebuffer can be written to with 16bit or 32bit data unsigned long buffertypesize = (writemode >= GR_LFBWRITEMODE_888) ? sizeof(FxU32) : sizeof(FxU16); Glide.FrameBuffer.Address = (FxU16*) AllocFrameBuffer(Glide.WindowTotalPixels * buffertypesize + openglpixels * sizeof(FxU32), 1); Glide.TempBuffer.Address = &Glide.FrameBuffer.Address[Glide.WindowTotalPixels * buffertypesize >> 1]; memset( Glide.FrameBuffer.Address, 0, Glide.WindowTotalPixels * buffertypesize); memset( Glide.TempBuffer.Address, 0, openglpixels * sizeof(FxU32)); } m_framebuffer->WriteMode = writemode; m_glInternalFormat = 4; m_glFormat = GL_RGBA; m_glType = GL_UNSIGNED_BYTE; FxU16 chromakeyvalue; switch (writemode) { case GR_LFBWRITEMODE_565: chromakeyvalue = s_GlideApplication.GetType() == GlideApplication::Carmageddon ? 0x1f1f : 0x07ff; m_format_valid = true; break; case GR_LFBWRITEMODE_1555: chromakeyvalue = 0x03ff; m_format_valid = true; break; case GR_LFBWRITEMODE_888: chromakeyvalue = 0x7ffdfeff; m_format_valid = true; break; default: chromakeyvalue = 0x0; m_format_valid = false; break; } // When the chromakeyvalue changes, the buffer has to be cleared if (chromakeyvalue != m_ChromaKey.Scalar) { SetChromaKeyValue(chromakeyvalue); m_must_clear_buffer = true; } } bool Framebuffer::begin_write() { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::begin_write()\n"); #endif if (m_must_clear_buffer) { Clear(); m_must_clear_buffer = false; } return true; } void Framebuffer::Clear() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::Clear()\n"); #endif const FxU16 chromakey = GetChromaKeyValue(); const FxU32 count = m_width * m_height ; FxU16* framebuffer = m_framebuffer->Address; for ( int i = 0; i < count; i++) { framebuffer[i] = chromakey; } } bool Framebuffer::end_write(FxU32 alpha, GLfloat depth, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::end_write(%d, %f, %d)\n", alpha, depth, pixelpipeline); #endif m_glDepth = depth; #ifdef __ALTIVEC__ for(int i = 0; i < 4; i++) { (&m_glAlpha.Scalar)[i] = alpha; } #else m_glAlpha.Scalar = alpha; #endif // if all pixels are invisible, nothing must be rendered. // The pixel conversion functions assume alpha is != 0 in order // to determine if a tile contains any pixels to be rendered. if (m_glAlpha.Scalar == 0) return false; set_gl_state(pixelpipeline); if (InternalConfig.EXT_compiled_vertex_array) { if (m_custom_tilesizes) { drawCompiledVertexArrays(m_custom_tilesizes, m_customtilesizesVertexArrayIndex, m_customtilesizesCount, pixelpipeline); } else { drawCompiledVertexArrays(m_tilesizes, m_tilesizesVertexArrayIndex, m_tilesizesCount, pixelpipeline); } } else { const tilesize* tilesizes = m_custom_tilesizes ? m_custom_tilesizes : m_tilesizes; draw(tilesizes, pixelpipeline); } restore_gl_state(pixelpipeline); return true; } bool Framebuffer::end_write(FxU32 alpha) { #ifdef OGL_DONE GlideMsg("Framebuffer::end_write(%d)\n", alpha); #endif // draw frame buffer // @todo: Depth should OpenGL.ZNear, but that breaks overlays in Myth FxBool result = end_write(alpha, 0.0, false); return result; } bool Framebuffer::end_write() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::end_write( )\n" ); #endif return end_write(0x000000ff); } bool Framebuffer::end_write_opaque() { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::end_write_opaque()\n"); #endif // @todo: Depth should OpenGL.ZNear, but that breaks overlays in Myth return end_write(0x000000ff, 0.0, false); } void Framebuffer::set_gl_state(bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::set_gl_state(%d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::set_gl_state"); VERIFY_ACTIVE_TEXTURE_UNIT(OpenGL.ColorAlphaUnit1); // Disable the cull mode glDisable(GL_CULL_FACE); // Disable clip volume hint manually to avoid recursion if (InternalConfig.EXT_clip_volume_hint && OpenGL.ClipVerticesEnabledState) { glHint(GL_CLIP_VOLUME_CLIPPING_HINT_EXT, GL_FASTEST); } if (pixelpipeline) { if (OpenGL.ColorAlphaUnit2) { // Pixelpipeline support for env cobine based rendering: // Framebuffer pixels must be routed through the coloralpha unit // as if they were produced by the vertex iterators without an // additional GL texture unit -> source must be changed accordingly m_bRestoreColorCombine = false; if (Glide.State.ColorCombineLocal == GR_COMBINE_LOCAL_ITERATED) { Glide.State.ColorCombineLocal = GR_COMBINE_LOCAL_PIXELPIPELINE; m_bRestoreColorCombine = true; } if (Glide.State.ColorCombineOther == GR_COMBINE_OTHER_ITERATED) { Glide.State.ColorCombineOther = GR_COMBINE_OTHER_PIXELPIPELINE; m_bRestoreColorCombine = true; } if (m_bRestoreColorCombine) SetColorCombineState(); m_bRestoreAlphaCombine = false; if (Glide.State.AlphaLocal == GR_COMBINE_LOCAL_ITERATED) { Glide.State.AlphaLocal = GR_COMBINE_LOCAL_PIXELPIPELINE; m_bRestoreAlphaCombine = true; } if (Glide.State.AlphaOther == GR_COMBINE_OTHER_ITERATED) { Glide.State.AlphaOther = GR_COMBINE_OTHER_PIXELPIPELINE; m_bRestoreAlphaCombine = true; } if (m_bRestoreAlphaCombine) SetAlphaCombineState(); // Update the opengl state for the pixel pipeline RenderUpdateState(); // If the write mode doesn't provide alpha then m_glAlpha is used // as the constant alpha value, and we can use the alpha test // to mask out chromakey pixels switch (m_framebuffer->WriteMode) { case GR_LFBWRITEMODE_565: case GR_LFBWRITEMODE_888: glEnable(GL_ALPHA_TEST); const GLenum alphaTestFunction = GL_EQUAL; const GLfloat alphaTestReferenceValue= m_glAlpha.Scalar * D1OVER255; OpenGL.AlphaTestFunction = alphaTestFunction; OpenGL.AlphaReferenceValue = alphaTestReferenceValue; glAlphaFunc(alphaTestFunction, alphaTestReferenceValue); glReportError(); break; } } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D if (m_useRectangleARB) { const GLenum textureTarget = GL_TEXTURE_RECTANGLE_ARB; const bool enableColoralphaTextureUnit1 = OpenGL.ColorAlphaUnitColorEnabledState[0] || OpenGL.ColorAlphaUnitAlphaEnabledState[0]; if (enableColoralphaTextureUnit1) { glEnable(textureTarget); glReportError(); } if (OpenGL.ColorAlphaUnit2) { const bool enableColoralphaTextureUnit2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (enableColoralphaTextureUnit2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glReportError(); glEnable(textureTarget); glReportError(); glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } } // The client texture state is already setup correctly since we just // have to adjust the texture rectangle state // (the texture_2d state is not changed when the pixelpipeline mode is active) } // Set the origin with clipping glMatrixMode(GL_PROJECTION); glLoadIdentity(); if (m_origin == GR_ORIGIN_LOWER_LEFT) { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMinY, Glide.State.ClipMaxY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.ClipMinY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } else { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMaxY, Glide.State.ClipMinY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.WindowHeight - OpenGL.ClipMaxY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } // The scissor rectangle is not reset, because scissor mode // is only enabled when clearing the buffer glMatrixMode(GL_MODELVIEW); glReportError(); } else { // disable blend glDisable(GL_BLEND); // disable depth buffer glDepthMask(false); // Enable colormask glColorMask( true, true, true, false); // Needed for displaying in-game menus if (Glide.State.DepthBufferMode != GR_DEPTHBUFFER_DISABLE) { glDisable(GL_DEPTH_TEST); } glEnable(GL_ALPHA_TEST); // Update state as we're calling update triggers on restore const GLenum alphaTestFunction = GL_GREATER; const GLfloat alphaTestReferenceValue= 0.0; OpenGL.AlphaTestFunction = alphaTestFunction; OpenGL.AlphaReferenceValue = alphaTestReferenceValue; glAlphaFunc(alphaTestFunction, alphaTestReferenceValue); glReportError(); if (InternalConfig.EXT_secondary_color) { glDisable(GL_COLOR_SUM_EXT); glReportError(); } // Reset the clipping window // and set the origin glMatrixMode(GL_PROJECTION); glLoadIdentity(); if (m_origin == GR_ORIGIN_LOWER_LEFT) { glOrtho(0, Glide.WindowWidth, 0, Glide.WindowHeight, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX, OpenGL.OriginY, OpenGL.WindowWidth, OpenGL.WindowHeight); } else { glOrtho(0, Glide.WindowWidth, Glide.WindowHeight, 0, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX, OpenGL.OriginY, OpenGL.WindowWidth, OpenGL.WindowHeight); } // The scissor rectangle is not changed, because scissor mode // is only enabled when clearing the buffer glMatrixMode(GL_MODELVIEW); glReportError(); // Disable fog bool disable_fog_texture_unit = OpenGL.FogTextureUnit; if (disable_fog_texture_unit) { glActiveTextureARB(OpenGL.FogTextureUnit); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.FogTextureUnit); glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, NULL); } glDisable(GL_TEXTURE_2D); } if (OpenGL.Fog && InternalConfig.FogMode != OpenGLideFogEmulation_None && InternalConfig.FogMode != OpenGLideFogEmulation_EnvCombine) { glDisable(GL_FOG); } glReportError(); // enable framebuffer texture unit if (OpenGL.ColorAlphaUnit2) { bool disable_coloralpha_texture_unit_2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (disable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit2); glDisableClientState(GL_TEXTURE_COORD_ARRAY); // On MacOS9 (Classic?) the texcoord pointer needs to be reset // to the default value when glLockArrays/glUnlockArrays is used glTexCoordPointer( 4, GL_FLOAT, 0, NULL ); } glDisable(GL_TEXTURE_2D); } if (disable_fog_texture_unit || disable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (!OpenGL.ColorAlphaUnitColorEnabledState[0] && !OpenGL.ColorAlphaUnitAlphaEnabledState[0]) { glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } } else { if (disable_fog_texture_unit) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (OpenGL.Texture == false) { // GL_RECTANGLE_ARB overrides GL_RECTANGLE_2D if (!m_useRectangleARB) glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } } if (m_useRectangleARB) { glEnable(GL_TEXTURE_RECTANGLE_ARB); glReportError(); } glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); glReportError(); } } void Framebuffer::restore_gl_state(bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::restore_gl_state(%d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::restore_gl_state"); // Restore the cull mode switch (Glide.State.CullMode) { case GR_CULL_DISABLE: break; case GR_CULL_NEGATIVE: case GR_CULL_POSITIVE: glEnable(GL_CULL_FACE); break; } if (InternalConfig.EXT_clip_volume_hint && OpenGL.ClipVerticesEnabledState) { glHint(GL_CLIP_VOLUME_CLIPPING_HINT_EXT, GL_NICEST); } // Restore the clipping window glMatrixMode(GL_PROJECTION); glLoadIdentity(); if ( Glide.State.OriginInformation == GR_ORIGIN_LOWER_LEFT ) { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMinY, Glide.State.ClipMaxY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.ClipMinY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } else { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMaxY, Glide.State.ClipMinY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.WindowHeight - OpenGL.ClipMaxY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } // The scissor rectangle is not reset, because scissor mode // is only enabled when clearing the buffer glMatrixMode( GL_MODELVIEW ); glReportError(); if (pixelpipeline) { if (OpenGL.ColorAlphaUnit2) { // restore current values if (m_bRestoreColorCombine) { if (Glide.State.ColorCombineLocal == GR_COMBINE_LOCAL_PIXELPIPELINE) Glide.State.ColorCombineLocal = GR_COMBINE_LOCAL_ITERATED; if (Glide.State.ColorCombineOther == GR_COMBINE_OTHER_PIXELPIPELINE) Glide.State.ColorCombineOther = GR_COMBINE_OTHER_ITERATED; SetColorCombineState(); } if(m_bRestoreAlphaCombine) { if (Glide.State.AlphaLocal == GR_COMBINE_LOCAL_PIXELPIPELINE) Glide.State.AlphaLocal = GR_COMBINE_LOCAL_ITERATED; if (Glide.State.AlphaOther == GR_COMBINE_OTHER_PIXELPIPELINE) Glide.State.AlphaOther = GR_COMBINE_LOCAL_ITERATED; SetAlphaCombineState(); } } switch (m_framebuffer->WriteMode) { case GR_LFBWRITEMODE_565: case GR_LFBWRITEMODE_888: SetChromaKeyAndAlphaState(); break; } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D if (m_useRectangleARB) { const GLenum textureTarget = GL_TEXTURE_RECTANGLE_ARB; const bool disableColoralphaTextureUnit1 = !(OpenGL.ColorAlphaUnitColorEnabledState[0] || OpenGL.ColorAlphaUnitAlphaEnabledState[0]); if (disableColoralphaTextureUnit1) { glDisable(textureTarget); glReportError(); } if (OpenGL.ColorAlphaUnit2) { const bool disableColoralphaTextureUnit2 = !(OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]); if (disableColoralphaTextureUnit2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glReportError(); glDisable(textureTarget); glReportError(); glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } } // The client texture state is already setup correctly since we just // have to adjust the texture rectangle state to the texture_2d state } } else { // restore previous state if (OpenGL.DepthBufferWritting ) { glDepthMask( true ); } if (Glide.State.DepthBufferMode != GR_DEPTHBUFFER_DISABLE) { glEnable( GL_DEPTH_TEST ); } // Restore colormask bool rgb = Glide.State.ColorMask; glColorMask(rgb, rgb, rgb, Glide.State.AlphaMask); if ( OpenGL.Blend ) { glEnable( GL_BLEND ); } if ( InternalConfig.EXT_secondary_color ) { glEnable( GL_COLOR_SUM_EXT ); } glReportError(); // Enable fog? bool enable_fog_texture_unit = OpenGL.FogTextureUnit && ((OpenGL.Fog && InternalConfig.FogMode == OpenGLideFogEmulation_EnvCombine) || Glide.State.ColorCombineInvert || Glide.State.AlphaInvert); if (enable_fog_texture_unit) { glActiveTextureARB(OpenGL.FogTextureUnit); glEnable(GL_TEXTURE_2D); // We're not using glDrawArrays to render the frame buffer, // but without disabling the client state the next texture drawn // by RenderDrawTriangles would get the wrong coordinates. // Can be observed in Carmageddon: The sky texture is rendered "too high" if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.FogTextureUnit); glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(1, GL_FLOAT, 0, &OGLRender.TFog[0]); } } if (OpenGL.Fog && InternalConfig.FogMode != OpenGLideFogEmulation_None && InternalConfig.FogMode != OpenGLideFogEmulation_EnvCombine) { glEnable(GL_FOG); } glReportError(); if (OpenGL.ColorAlphaUnit2) { bool enable_coloralpha_texture_unit_2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (enable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit2); glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } if (enable_fog_texture_unit || enable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (!OpenGL.ColorAlphaUnitColorEnabledState[0] && !OpenGL.ColorAlphaUnitAlphaEnabledState[0]) { if (InternalConfig.EXT_compiled_vertex_array) { glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer( 4, GL_FLOAT, 0, NULL ); } glDisable(GL_TEXTURE_2D); } // Restore the previous texture environment glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE_EXT); } else { if (enable_fog_texture_unit) { glActiveTextureARB(OpenGL.ColorAlphaUnit1); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } } if (OpenGL.Texture == false) { if (InternalConfig.EXT_compiled_vertex_array) { glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer( 4, GL_FLOAT, 0, NULL ); } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D and this // has already been skipped in setState if (!m_useRectangleARB) glDisable(GL_TEXTURE_2D); } // Restore the previous texture environment SetColorCombineState(); } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D if (m_useRectangleARB) { glDisable(GL_TEXTURE_RECTANGLE_ARB); glReportError(); } glReportError(); // This must be a forced update because GlideState changes of ChromaKeyMode // that don't change the corresponding GL-state are filtered out ForceChromaKeyAndAlphaStateUpdate(); } glReportError(); VERIFY_ACTIVE_TEXTURE_UNIT(OpenGL.ColorAlphaUnit1); VERIFY_TEXTURE_ENABLED_STATE(); } bool Framebuffer::draw(const tilesize* tilesizetable, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::draw(---, %d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::draw()"); bool init_second_textureunit = pixelpipeline && OpenGL.ColorAlphaUnit2; FxU32* texbuffer = reinterpret_cast(m_texbuffer->Address); // Render the tiles GLint n = 0; GLint x; GLint y = 0; GLint y_step; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; // Use unique (but always the same) name for each texture in order // to maintain the size and avoid vram memory reallocation GLint texturename = m_textureNames[n]; const TileUpdateState updateState = createTextureData(texbuffer, x, y, x_step, y_step, n); if (updateState != TileUpdateState_TileEmpty) { #ifdef DEBUG_TILE_RENDERING unsigned int color; if (updateState == TileUpdateState_TileDownloadToGPU) // edges of downloaded tiles are red color = 0xff0000ff; else // edges of rendered tiles are cyan color = 0x00ffffff; ((long*) texbuffer)[0] = ((long*) texbuffer)[1] = ((long*) texbuffer)[x_step] = ((long*) texbuffer)[x_step -1] = ((long*) texbuffer)[x_step -2] = ((long*) texbuffer)[2 * x_step -1] = ((long*) texbuffer)[(x_step - 1) * y_step] = ((long*) texbuffer)[(x_step - 1) * y_step + 1] = ((long*) texbuffer)[(x_step - 2) * y_step] = ((long*) texbuffer)[x_step * y_step - 1] = ((long*) texbuffer)[x_step * y_step - 2] = ((long*) texbuffer)[x_step * (y_step - 1) - 1] = color; #endif glBindTexture(GL_TEXTURE_2D, texturename); if (init_second_textureunit) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glBindTexture(GL_TEXTURE_2D, texturename); glActiveTextureARB(OpenGL.ColorAlphaUnit1); } #ifndef DEBUG_TILE_RENDERING if (updateState == TileUpdateState_TileDownloadToGPU || InternalConfig.APPLE_client_storage == false) { glTexImage2D(GL_TEXTURE_2D, 0, m_glInternalFormat, x_step, y_step, 0, m_glFormat, m_glType, texbuffer); glReportError(); } #endif static struct { const GLfloat bl[4]; const GLfloat br[4]; const GLfloat tr[4]; const GLfloat tl[4]; } texcoords = { {0.0, 0.0, 1.0, 1.0}, {1.0, 0.0, 1.0, 1.0}, {1.0, 1.0, 1.0, 1.0}, {0.0, 1.0, 1.0, 1.0} }; glBegin(GL_QUADS); // counter clockwise glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.bl[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.bl[0]); } glVertex3f(x, y, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.br[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.br[0]); } glVertex3f(x + x_step, y, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.tr[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.tr[0]); } glVertex3f(x + x_step, y + y_step, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.tl[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.tl[0]); } glVertex3f(x, y + y_step , m_glDepth); glEnd(); glReportError(); // Advance to the next texbuffer location texbuffer += x_step * y_step; } n ++; } } s_Framebuffer.SetRenderBufferChanged(); return y == m_height && x == m_width; } bool Framebuffer::drawCompiledVertexArrays(const tilesize* tilesizetable, int vertexarrayindex, int tilecount, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::draw(---, %d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::drawCompiledVertexArrays()"); // Finish rendering RenderUnlockArrays(); // Transfer coords to VRAM glLockArraysEXT(vertexarrayindex * 3, tilecount * 6); OGLRender.BufferLocked = true; const bool init_second_textureunit = pixelpipeline && (OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]); FxU32* texbuffer = reinterpret_cast(m_texbuffer->Address); // Render the tiles GLint n = 0; GLint x; GLint y = 0; GLint y_step; const GLenum textureTarget = m_useRectangleARB ? GL_TEXTURE_RECTANGLE_ARB : GL_TEXTURE_2D; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; const TileUpdateState updateState = createTextureData(texbuffer, x, y, x_step, y_step, n); if (updateState != TileUpdateState_TileEmpty) { #ifdef DEBUG_TILE_RENDERING unsigned int color; if (updateState == TileUpdateState_TileDownloadToGPU) // edges of downloaded tiles are red color = 0xff0000ff; else // edges of rendered tiles are cyan color = 0x00ffffff; ((long*) texbuffer)[0] = ((long*) texbuffer)[1] = ((long*) texbuffer)[x_step] = ((long*) texbuffer)[x_step -1] = ((long*) texbuffer)[x_step -2] = ((long*) texbuffer)[2 * x_step -1] = ((long*) texbuffer)[x_step * (y_step - 2)] = ((long*) texbuffer)[x_step * (y_step - 1)] = ((long*) texbuffer)[x_step * (y_step - 1) + 1] = ((long*) texbuffer)[x_step * y_step - 1] = ((long*) texbuffer)[x_step * y_step - 2] = ((long*) texbuffer)[x_step * (y_step - 1) - 1] = color; #endif // Use unique (but always the same) name for each // texture in order to be able to reuse tile data const GLint texturename = m_textureNames[n]; // The texture rectangle is better suited for video, // which is close to a framebuffer if (init_second_textureunit) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glReportError(); glBindTexture(textureTarget, texturename); glReportError(); glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } glBindTexture(textureTarget, texturename); glReportError(); #ifndef DEBUG_TILE_RENDERING if (updateState == TileUpdateState_TileDownloadToGPU || InternalConfig.APPLE_client_storage == false) #endif { glTexImage2D(textureTarget, 0, m_glInternalFormat, x_step, y_step, 0, m_glFormat, m_glType, texbuffer); glReportError(); } // Draw the tile glDrawArrays(GL_TRIANGLES, vertexarrayindex * 3 + n * 6, 6); // Advance to the next texbuffer location texbuffer += x_step * y_step; glReportError(); } n++; } } s_Framebuffer.SetRenderBufferChanged(); return y == m_height && x == m_width; } int Framebuffer::buildVertexArrays(const tilesize* tilesizetable, int vertexarrayindex) { // Compute coordinates for compiled vertex arrays TColorStruct* pC = &OGLRender.TColor[vertexarrayindex]; TVertexStruct* pV = &OGLRender.TVertex[vertexarrayindex]; TTextureStruct* pTS = &OGLRender.TTexture[vertexarrayindex]; int n = 0; GLint y = 0; GLint y_step; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; GLint x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; // Write coordinates counter clockwise into render buffers pC->ar = pC->ag = pC->ab = pC->br = pC->bg = pC->bb = pC->cr = pC->cg = pC->cb = pC->aa = pC->ba = pC->ca = 1.0f; pV->ax = x; pV->ay = y; pV->bx = x + x_step; pV->by = y; pV->cx = x + x_step; pV->cy = y + y_step; pV->az = pV->bz = pV->cz = m_glDepth; pTS->as = 0.0; pTS->at = 0.0; pTS->bs = m_useRectangleARB ? x_step : 1.0f; pTS->bt = 0.0; pTS->cs = m_useRectangleARB ? x_step : 1.0f; pTS->ct = m_useRectangleARB ? y_step : 1.0f; pTS->aq = pTS->bq = pTS->cq = 0.0f; pTS->aoow = pTS->boow = pTS->coow = 1.0f; pC++; pV++; pTS++; pC->ar = pC->ag = pC->ab = pC->br = pC->bg = pC->bb = pC->cr = pC->cg = pC->cb = pC->aa = pC->ba = pC->ca = 1.0f; pV->ax = x + x_step; pV->ay = y + y_step; pV->bx = x; pV->by = y + y_step; pV->cx = x; pV->cy = y; pV->az = pV->bz = pV->cz = m_glDepth; pTS->as = m_useRectangleARB ? x_step : 1.0f; pTS->at = m_useRectangleARB ? y_step : 1.0f; pTS->bs = 0.0; pTS->bt = m_useRectangleARB ? y_step : 1.0f; pTS->cs = 0.0; pTS->ct = 0.0; pTS->aq = pTS->bq = pTS->cq = 0.0f; pTS->aoow = pTS->boow = pTS->coow = 1.0f; pC++; pV++; pTS++; n++; } } return n; } #ifdef __ALTIVEC__ // altivec code inline Framebuffer::TileUpdateState Framebuffer::Convert565Kto8888_AV(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride, int checksumIndex) { const vector bool short chromakey_565_av = m_ChromaKey.Vector; const int width_av = width >> 3; // 8 16-bit words const int stride_av = stride >> 3; // 8 16-bit words const int jump_av = width_av + stride_av; vector bool short* src_av = (vector bool short*) buffer1; // Setup channel 0 for reading one row of 565 ushorts from src into the L1 cache // This isn't read again soon, just written back once so we can bypass L2 cache const int src_control = (((width_av >> 4) & 0x1f) << 3) + (1 << 8) + (stride_av << 16); vec_dstt(src_av, src_control, 0); int h = height; // loop through the src to check whether anything has to be copied at all vector bool short* stop_zero_av = &src_av[width_av]; do { do { const vector bool short pixels_565_av = *src_av; if (!vec_all_eq(pixels_565_av, chromakey_565_av)) goto create_8888_texture_1_av; // Test clear first before jumping to create_8888_texture_1_av src_av++; } while (src_av != stop_zero_av); src_av += stride_av; // Update channel 0 to prefetch the next row into the L1 cache vec_dstt(src_av, src_control, 0); stop_zero_av += jump_av; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture_1_av: // Delete dst up to the last chromakey entry in src stop_zero_av = src_av; src_av = (vector bool short*) buffer1; vector unsigned long* dst_av = (vector unsigned long*) buffer2; const vector unsigned long null_av = vec_splat_u32(0); // We're just writing to dst, no reading must occur, and starting a prefetch is a bad idea h = height; vector bool short* stop_av = &src_av[width_av]; do { do { if (src_av == stop_zero_av) goto create_8888_texture_2_av; // Test clear first // clear cacheline to prevent it from being read-in (32 bytes = 2 altivec writes) // we're just clearing the cache line since we're going to write zeros anyway __dcbz(dst_av, 0); dst_av += 2; src_av++; } while (src_av != stop_av); src_av += stride_av; stop_av += jump_av; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture_2_av: // Build permute vector for storing high/lo 565 pixels into RgbxA // - results in R = r565+ggg, G = gggbbbbb, B = 0, A = glAlpha // -> good for comparison, green and blue are converted afterwards const vector unsigned char permute_hi_av = {0x00, 0x01, 0x12, 0x13, 0x02, 0x03, 0x16, 0x17, 0x04, 0x05, 0x1a, 0x1b, 0x06, 0x07, 0x1e, 0x1f}; // Computing the permute table just takes 2 instructions instead of 1 instruction + 4 memory reads const vector unsigned char permute_lo_av = vec_or(permute_hi_av, vec_splat_u8(8)); // const vector unsigned long alpha_8888_av = {m_glAlpha, m_glAlpha, m_glAlpha, m_glAlpha}; const vector unsigned long alpha_8888_av = m_glAlpha.Vector; // Build chromakey and alpha RgbxA vector const vector unsigned long chromakey_RgbxA_av = vec_perm((const vector unsigned long) chromakey_565_av, alpha_8888_av, permute_lo_av); // Constants const vector unsigned long const_3_av = vec_splat_u32(3); const vector unsigned long const_5_av = vec_splat_u32(5); // R5G6B500AA color masks const vector unsigned long mask_8888_ra = {0xf80000ff, 0xf80000ff, 0xf80000ff, 0xf80000ff}; const vector unsigned long mask_8888_g = {0x07e00000, 0x07e00000, 0x07e00000, 0x07e00000}; // Computing the mask just takes 2 instructions instead of 1 instruction + 4 memory reads const vector unsigned long mask_8888_b = vec_sr(mask_8888_ra, vec_splat_u32(11)); vector unsigned long pixels_8888_src_av; vector bool long mask; vector unsigned long p; vector unsigned long q; vector unsigned long pixels_8888_dst_av; // Checksum the tile vector unsigned long c = null_av; vector unsigned long d; // Continue the loop and convert pixels from 565 to 8888 vec_dstt(src_av, src_control, 0); do { do { const vector unsigned long pixels_565_av = (const vector unsigned long) (*src_av); // tile checksum part 1 d = vec_sr(c, const_5_av); c = vec_add(c, pixels_565_av); // restore chroma key for next update *src_av++ = chromakey_565_av; // hi-word pixels pixels_8888_src_av = vec_perm(pixels_565_av, alpha_8888_av, permute_hi_av); mask = vec_cmpeq(pixels_8888_src_av, chromakey_RgbxA_av); // Keep red and alpha component pixels_8888_dst_av = vec_and(pixels_8888_src_av, mask_8888_ra); // add green component p = vec_and(pixels_8888_src_av, mask_8888_g); q = vec_sr(p, const_3_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // add blue component p = vec_and(pixels_8888_src_av, mask_8888_b); q = vec_sr(p, const_5_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // We're just writing to dst and thus can clear the cacheline in order // to avoid the read-in from system memory (32 bytes = 2 altivec writes) // Note: This is a G4 hack, but on a G5 the code will be fast enough anyway __dcbz(dst_av, 0); // Select between pixels and chromakey *dst_av++ = vec_sel(pixels_8888_dst_av, null_av, mask); // tile checksum part 2 c = vec_xor(c, d); // lo-word pixels pixels_8888_src_av = vec_perm(pixels_565_av, alpha_8888_av, permute_lo_av); mask = vec_cmpeq(pixels_8888_src_av, chromakey_RgbxA_av); // Keep red and alpha component pixels_8888_dst_av = vec_and(pixels_8888_src_av, mask_8888_ra); // add green component p = vec_and(pixels_8888_src_av, mask_8888_g); q = vec_sr(p, const_3_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // add blue component p = vec_and(pixels_8888_src_av, mask_8888_b); q = vec_sr(p, const_5_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // Select between pixels and chromakey *dst_av++ = vec_sel(pixels_8888_dst_av, null_av, mask); } while (src_av != stop_av); src_av += stride_av; vec_dstt(src_av, src_control, 0); stop_av += jump_av; } while (--h); // Skip downloading tile data to the gpu if the content hasn't changed if (vec_all_eq(c, m_tileChecksums[checksumIndex])) return TileUpdateState_TileDrawOnly; // The tile has been converted, been changed and must be downloaded to the gpu m_tileChecksums[checksumIndex] = c; return TileUpdateState_TileDownloadToGPU; } #endif // Non-Altivec-code inline Framebuffer::TileUpdateState Framebuffer::Convert565Kto8888(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once const register unsigned long chromakey1 = m_ChromaKey.Scalar << 16; const register unsigned long chromakey2 = m_ChromaKey.Scalar; const register unsigned long chromakey12 = chromakey1 | chromakey2; width = width >> 1; stride = stride >> 1; register unsigned long pixel; register unsigned long* stop; register unsigned long jump = width + stride; register unsigned long* src = reinterpret_cast(buffer1); // check if tile must be processed in advance // to avoid useless writes to main memory // The tile should at least fit into the second level cache // so reading it again wouldn't hurt as much as doing needless writes register unsigned long h = height; stop = &src[width]; do { do { pixel = *src++; if (pixel != chromakey12) goto create_8888_texture; } while (src != stop); src += stride; stop += jump; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture: const register unsigned long alpha = m_glAlpha.Scalar; const register unsigned long null = 0x00000000; const register unsigned long mask_pixel1 = 0xffff0000; const register unsigned long mask_pixel2 = 0x0000ffff; const register unsigned long mask_pixel1_r = 0xf8000000; const register unsigned long mask_pixel1_g = 0x07e00000; const register unsigned long mask_pixel1_b = 0x001f0000; const register unsigned long mask_pixel2_r = 0x0000f800; const register unsigned long mask_pixel2_g = 0x000007e0; const register unsigned long mask_pixel2_b = 0x0000001f; src = reinterpret_cast(buffer1); stop = &src[width]; do { do { // GL_RGBA pixel = *src; if (pixel == chromakey12) { *buffer2++ = null; *buffer2++ = null; } else { *src = chromakey12; if ( (pixel & mask_pixel1) == chromakey1) { *buffer2++ = null; } else { *buffer2++ = ( alpha | // A ( pixel & mask_pixel1_b ) >> 5 | // B ( pixel & mask_pixel1_g ) >> 3 | // G ( pixel & mask_pixel1_r )); // R } if ( (pixel & mask_pixel2) == chromakey2) { *buffer2++ = null; } else { *buffer2++ = ( alpha | // A ( pixel & mask_pixel2_b ) << 11 | // B ( pixel & mask_pixel2_g ) << 13 | // G ( pixel & mask_pixel2_r ) << 16); // R } } src++; } while (src != stop); src += stride; stop += jump; } while (--height); return TileUpdateState_TileDownloadToGPU; } inline Framebuffer::TileUpdateState Framebuffer::Convert1555Kto8888(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once register unsigned long pixel; register unsigned long x; register unsigned long* src = reinterpret_cast(buffer1); const unsigned long null = 0x00000000; register unsigned long dstpixel = null; const register unsigned long chromakey1 = m_ChromaKey.Scalar << 16; const register unsigned long chromakey2 = m_ChromaKey.Scalar; const register unsigned long chromakey12 = chromakey1 | chromakey2; const register unsigned long alpha = m_glAlpha.Scalar; const register unsigned long mask_pixel1 = 0xffff0000; const register unsigned long mask_pixel2 = 0x0000ffff; const register unsigned long mask_pixel1_r = 0x7c000000; const register unsigned long mask_pixel1_g = 0x03e00000; const register unsigned long mask_pixel1_b = 0x001f0000; const register unsigned long mask_pixel2_r = 0x00007c00; const register unsigned long mask_pixel2_g = 0x000003e0; const register unsigned long mask_pixel2_b = 0x0000001f; width >>= 1; stride >>= 1; do { x = width; do { // GL_RGBA pixel = *src; if (pixel == chromakey12) { *buffer2++ = null; *buffer2++ = null; } else { *src = chromakey12; if ( (pixel & mask_pixel1) == chromakey1) { *buffer2++ = null; } else { dstpixel = ( alpha | // A ( pixel & mask_pixel1_b ) >> 5 | // B ( pixel & mask_pixel1_g ) >> 2 | // G ( pixel & mask_pixel1_r ) << 1); // R *buffer2++ = dstpixel; } if ( (pixel & mask_pixel2) == chromakey2) { *buffer2++ = null; } else { dstpixel = ( alpha | // A ( pixel & mask_pixel2_b ) << 11 | // B ( pixel & mask_pixel2_g ) << 14 | // G ( pixel & mask_pixel2_r ) << 17); // R *buffer2++ = dstpixel; } } src++; } while (--x); src += stride; } while (--height); return dstpixel != null ? TileUpdateState_TileDownloadToGPU : TileUpdateState_TileEmpty; } inline Framebuffer::TileUpdateState Framebuffer::ConvertARGB8888Kto8888(FxU32* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once const register unsigned long chromakey = m_ChromaKey.Scalar || (m_ChromaKey.Scalar << 16); register unsigned long pixel; register unsigned long* stop; register unsigned jump = width + stride; register unsigned long* src = buffer1; // check if tile must be processed in advance // to avoid useless writes to main memory // The tile should at least fit into the second level cache // so reading it again wouldn't hurt as much as doing needless writes register unsigned long h = height; stop = &src[width]; do { do { pixel = *src++; if (pixel != chromakey) goto create_8888_texture; } while (src != stop); src += stride; stop += jump; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture: const register unsigned long alpha = m_glAlpha.Scalar; src = buffer1; stop = &src[width]; do { do { // GL_RGBA pixel = *src; if (pixel == chromakey) { *buffer2++ = 0; } else { *src = chromakey; *buffer2++ = (pixel << 8) | alpha; } src++; } while (src != stop); src += stride; stop += jump; } while (--height); return TileUpdateState_TileDownloadToGPU; } inline Framebuffer::TileUpdateState Framebuffer::createTextureData(FxU32* texbuffer, FxU32 x, FxU32 y, FxU32 x_step, FxU32 y_step, int checksumIndex) { FxU32 stride = (m_width - x_step); FxU32 index = x + y * m_width; if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_565) { #ifdef __ALTIVEC__ if (UserConfig.VectorUnitType == OpenGLideVectorUnitType_Altivec) { #ifdef OGL_FRAMEBUFFER const vector unsigned long c = m_tileChecksums[checksumIndex]; #endif TileUpdateState state = Convert565Kto8888_AV(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride, checksumIndex); #ifdef OGL_FRAMEBUFFER GlideMsg("Tile %d (%d,%d)-(%d,%d) update state is %s (%vlx)->(%vlx)\n", checksumIndex, x, y, x_step, y_step, (state ==TileUpdateState_TileDownloadToGPU) ? "DownLoadToGPU" : ((state == TileUpdateState_TileDrawOnly) ? "DrawOnly" : "TileEmpty"), c, m_tileChecksums[checksumIndex]); #endif return state; } else #endif return Convert565Kto8888(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride); } else if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_1555) { return Convert1555Kto8888(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride); } else if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_888) { FxU32* framebuffer = &reinterpret_cast(m_framebuffer->Address)[index]; return ConvertARGB8888Kto8888(framebuffer, texbuffer, x_step, y_step, stride); } else { return TileUpdateState_TileEmpty; } } \ No newline at end of file +//************************************************************** //* OpenGLide - Glide to OpenGL Wrapper //* http://openglide.sourceforge.net //* //* framebuffer emulation //* //* OpenGLide is OpenSource under LGPL license //* Mac version and additional features by Jens-Olaf Hemprich //************************************************************** #include "Framebuffer.h" #include "Glide.h" #include "GlideApplication.h" #include "GlideSettings.h" #include "GLRender.h" #include "GLRenderUpdateState.h" #include "GLColorAlphaCombineEnvTables.h" // check if tile needs to be displayed #define CHECK_RENDER_TILE // Display small dots at opposite corners of rendered framebuffer tiles //#define DEBUG_TILE_RENDERING Framebuffer::Framebuffer() : m_x_step_start(0) , m_y_step_start(0) , m_x_step_start_opaque(0) , m_y_step_start_opaque(0) , m_width(0) , m_height(0) , m_framebuffer(NULL) , m_texbuffer(NULL) , m_origin(GR_ORIGIN_UPPER_LEFT) , m_glInternalFormat(-1) , m_glFormat(-1) , m_glType(-1) , m_glDepth(1.0f) , m_format_valid(false) , m_useRectangleARB(false) , m_must_clear_buffer(true) , m_custom_tilesizes(NULL) { } Framebuffer::~Framebuffer() { } inline int Framebuffer::getTileCount() const { return InternalConfig.EXT_compiled_vertex_array ? m_tilesizesCount + m_customtilesizesCount : s_maxTiles; } bool Framebuffer::initialise_buffers(BufferStruct* framebuffer, BufferStruct* texbuffer, FxU32 width, FxU32 height, const tilesize* tilesizetable) { #ifdef OGL_FRAMEBUFFER GlideMsg( "GlideFrameBuffer::initialise_buffers(---, ---, %d, %d, ---)\n", width, height); #endif m_custom_tilesizes = tilesizetable; return initialise_buffers(framebuffer, texbuffer, width, height, 0, 0); } bool Framebuffer::initialise_buffers(BufferStruct* framebuffer, BufferStruct* texbuffer, FxU32 width, FxU32 height, FxU32 x_tile, FxU32 y_tile) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::initialise_buffers(---, ---, %d, %d, %d, %d)\n", width, height, x_tile, y_tile); #endif m_framebuffer = framebuffer; m_texbuffer = texbuffer; m_framebuffer->WriteMode = m_texbuffer->WriteMode = GR_LFBWRITEMODE_UNUSED; m_width = width; m_height = height; // find out largest texture size GLint tile_size; glGetIntegerv(GL_MAX_TEXTURE_SIZE, &tile_size); m_x_step_start_opaque = tile_size; m_y_step_start_opaque = tile_size; m_x_step_start = min(tile_size, x_tile); m_y_step_start = min(tile_size, y_tile); m_x_step_start = max(16, m_x_step_start); m_y_step_start = max(16, m_y_step_start); m_useRectangleARB = InternalConfig.ARB_texture_rectangle && InternalConfig.EXT_compiled_vertex_array; // // The texture priority is set to minimun because // // frame buffer textures are never used a second time // // @todo: This is not true anymore in all cases // // because of the altivec checksum feature // const GLfloat zero = 0.0f; // // @todo: Only allocate needed texture names (m_tilesizesCount + m_customtilesizesCount) // glGenTextures(m_tileCount, &m_textureNames[0]); // glPrioritizeTextures(m_tileCount, &m_textureNames[0], &zero); // for(int i = 0; i < m_tileCount; i++) // { // const GLenum textureTarget = m_useRectangleARB ? GL_TEXTURE_RECTANGLE_ARB : GL_TEXTURE_2D; // glBindTexture(textureTarget, m_textureNames[i]); // if (m_useRectangleARB) // { // // This will not probably not work right now, because // // we're using RGBA (the OS9 prefered texture format) // // @todo: use ABGR??? in order to avoid byte swizzling // // glTextureRangeAPPLE(GLenum target, GLsizei length, GLvoid *pointer); // glTexParameteri(textureTarget, // GL_TEXTURE_STORAGE_HINT_APPLE, // GL_STORAGE_CACHED_APPLE); // glReportError(); // } // glTexParameteri(textureTarget, GL_TEXTURE_MIN_FILTER, GL_NEAREST); // glTexParameteri(textureTarget, GL_TEXTURE_MAG_FILTER, GL_NEAREST); // glTexParameteri(textureTarget, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); // glTexParameteri(textureTarget, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); // } // If a game has its own tilesize table, use // the largest tiles for opaque renderings GLint y_step = y_tile == 0 ? m_y_step_start_opaque : m_y_step_start; // init default/opaque tilesize table int w = 0; for(FxU32 y = 0; y < m_height && w < MaxTiles ; y += y_step, w++) { while (m_height - y < y_step) { y_step = y_step >> 1; } m_tilesizes[w].y = y_step; GLint x_step = x_tile == 0 ? m_x_step_start_opaque : m_x_step_start; int v = 0; for(FxU32 x = 0; x < m_width && v < MaxTiles; x += x_step, v++ ) { while (m_width - x < x_step) { x_step = x_step >> 1; } m_tilesizes[w].x[v] = x_step; } } if (InternalConfig.EXT_compiled_vertex_array) { // Generate tile coordinate arrays m_tilesizesVertexArrayIndex = OGLRender.FrameBufferStartIndex; m_tilesizesCount = buildVertexArrays(&m_tilesizes[0], m_tilesizesVertexArrayIndex); if (m_custom_tilesizes) { m_customtilesizesVertexArrayIndex = m_tilesizesVertexArrayIndex + m_tilesizesCount * 2; m_customtilesizesCount = buildVertexArrays(m_custom_tilesizes, m_customtilesizesVertexArrayIndex); } else { m_customtilesizesCount = 0; } } // The texture priority is set to maximum // because of the altivec checksum feature const GLfloat priority = 1.0f; const int tileCount = getTileCount(); glGenTextures(tileCount, &m_textureNames[0]); glPrioritizeTextures(tileCount, &m_textureNames[0], &priority); for(int i = 0; i < tileCount; i++) { const GLenum textureTarget = m_useRectangleARB ? GL_TEXTURE_RECTANGLE_ARB : GL_TEXTURE_2D; glBindTexture(textureTarget, m_textureNames[i]); if (m_useRectangleARB) { // This will not probably not work right now, because // we're using RGBA (the OS9 prefered texture format) // @todo: use ABGR??? in order to avoid byte swizzling // glTextureRangeAPPLE(GLenum target, GLsizei length, GLvoid *pointer); glTexParameteri(textureTarget, GL_TEXTURE_STORAGE_HINT_APPLE, GL_STORAGE_CACHED_APPLE); glReportError(); } glTexParameteri(textureTarget, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(textureTarget, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(textureTarget, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(textureTarget, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } // Don't set the checksum to 0, as this would cause white screen in Carmageddon // because fully black tiles also have a 0 checksum and no texture data would be // downloaded at all (and the tile would be rendered as if TEXTURE_2D was disabled) memset(m_tileChecksums, 0xff, sizeof(vector unsigned long) * tileCount); return m_width > 0 && m_height > 0 && m_x_step_start > 0 && m_y_step_start > 0 && m_format_valid; } void Framebuffer::free_buffers() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::free_buffers()\n"); #endif if (m_tilesizes) FreeObject(m_tilesizes); glDeleteTextures(getTileCount(), &m_textureNames[0]); } void Framebuffer::initialise_format(GrLfbWriteMode_t writemode) { #if defined(OGL_PART_DONE) || defined(OGL_FRAMEBUFFER) GlideMsg("Framebuffer::initialise_format(0x%x)\n", writemode); #endif // Enlarge buffer? if (writemode >= GR_LFBWRITEMODE_888 && (m_framebuffer->WriteMode < GR_LFBWRITEMODE_888 || m_framebuffer->WriteMode == GR_LFBWRITEMODE_UNUSED) && m_framebuffer->Address) { // Delete existing buffer FreeFrameBuffer(m_framebuffer->Address); m_framebuffer->Address = NULL; m_texbuffer->Address = NULL; } // Allocate 32-bit buffer (16bit buffer has been allocated in grSstWinOpen() if (m_framebuffer->Address == NULL) { const unsigned long openglpixels = OpenGL.WindowWidth * OpenGL.WindowHeight; // Framebuffer can be written to with 16bit or 32bit data const unsigned long buffertypesize = (writemode >= GR_LFBWRITEMODE_888) ? sizeof(FxU32) : sizeof(FxU16); Glide.FrameBuffer.Address = (FxU16*) AllocFrameBuffer(Glide.WindowTotalPixels * buffertypesize + openglpixels * sizeof(FxU32), 1); Glide.TempBuffer.Address = &Glide.FrameBuffer.Address[Glide.WindowTotalPixels * buffertypesize >> 1]; memset(Glide.FrameBuffer.Address, 0, Glide.WindowTotalPixels * buffertypesize); memset(Glide.TempBuffer.Address, 0, openglpixels * sizeof(FxU32)); } m_framebuffer->WriteMode = writemode; m_glInternalFormat = 4; m_glFormat = GL_RGBA; m_glType = GL_UNSIGNED_BYTE; FxU16 chromakeyvalue; switch (writemode) { case GR_LFBWRITEMODE_565: chromakeyvalue = s_GlideApplication.GetType() == GlideApplication::Carmageddon ? 0x1f1f : 0x07ff; m_format_valid = true; break; case GR_LFBWRITEMODE_1555: chromakeyvalue = 0x03ff; m_format_valid = true; break; case GR_LFBWRITEMODE_888: chromakeyvalue = 0x7ffdfeff; m_format_valid = true; break; default: chromakeyvalue = 0x0; m_format_valid = false; break; } // When the chromakeyvalue changes, the buffer has to be cleared if (chromakeyvalue != m_ChromaKey.Scalar) { SetChromaKeyValue(chromakeyvalue); m_must_clear_buffer = true; } } bool Framebuffer::begin_write() { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::begin_write()\n"); #endif if (m_must_clear_buffer) { Clear(); m_must_clear_buffer = false; } return true; } void Framebuffer::Clear() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::Clear()\n"); #endif const FxU16 chromakey = GetChromaKeyValue(); const FxU32 count = m_width * m_height ; FxU16* framebuffer = m_framebuffer->Address; for ( int i = 0; i < count; i++) { framebuffer[i] = chromakey; } } bool Framebuffer::end_write(FxU32 alpha, GLfloat depth, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::end_write(%d, %f, %d)\n", alpha, depth, pixelpipeline); #endif m_glDepth = depth; #ifdef __ALTIVEC__ for(int i = 0; i < 4; i++) { (&m_glAlpha.Scalar)[i] = alpha; } #else m_glAlpha.Scalar = alpha; #endif // if all pixels are invisible, nothing must be rendered. // The pixel conversion functions assume alpha is != 0 in order // to determine if a tile contains any pixels to be rendered. if (m_glAlpha.Scalar == 0) return false; set_gl_state(pixelpipeline); if (InternalConfig.EXT_compiled_vertex_array) { if (m_custom_tilesizes) { drawCompiledVertexArrays(m_custom_tilesizes, m_customtilesizesVertexArrayIndex, m_customtilesizesCount, pixelpipeline); } else { drawCompiledVertexArrays(m_tilesizes, m_tilesizesVertexArrayIndex, m_tilesizesCount, pixelpipeline); } } else { const tilesize* tilesizes = m_custom_tilesizes ? m_custom_tilesizes : m_tilesizes; draw(tilesizes, pixelpipeline); } restore_gl_state(pixelpipeline); return true; } bool Framebuffer::end_write(FxU32 alpha) { #ifdef OGL_DONE GlideMsg("Framebuffer::end_write(%d)\n", alpha); #endif // draw frame buffer // @todo: Depth should OpenGL.ZNear, but that breaks overlays in Myth FxBool result = end_write(alpha, 0.0, false); return result; } bool Framebuffer::end_write() { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::end_write( )\n" ); #endif return end_write(0x000000ff); } bool Framebuffer::end_write_opaque() { #ifdef OGL_FRAMEBUFFER GlideMsg("Framebuffer::end_write_opaque()\n"); #endif // @todo: Depth should OpenGL.ZNear, but that breaks overlays in Myth return end_write(0x000000ff, 0.0, false); } void Framebuffer::set_gl_state(bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::set_gl_state(%d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::set_gl_state"); VERIFY_ACTIVE_TEXTURE_UNIT(OpenGL.ColorAlphaUnit1); // Disable the cull mode glDisable(GL_CULL_FACE); // Disable clip volume hint manually to avoid recursion if (InternalConfig.EXT_clip_volume_hint && OpenGL.ClipVerticesEnabledState) { glHint(GL_CLIP_VOLUME_CLIPPING_HINT_EXT, GL_FASTEST); } if (pixelpipeline) { if (OpenGL.ColorAlphaUnit2) { // Pixelpipeline support for env cobine based rendering: // Framebuffer pixels must be routed through the coloralpha unit // as if they were produced by the vertex iterators without an // additional GL texture unit -> source must be changed accordingly m_bRestoreColorCombine = false; if (Glide.State.ColorCombineLocal == GR_COMBINE_LOCAL_ITERATED) { Glide.State.ColorCombineLocal = GR_COMBINE_LOCAL_PIXELPIPELINE; m_bRestoreColorCombine = true; } if (Glide.State.ColorCombineOther == GR_COMBINE_OTHER_ITERATED) { Glide.State.ColorCombineOther = GR_COMBINE_OTHER_PIXELPIPELINE; m_bRestoreColorCombine = true; } if (m_bRestoreColorCombine) SetColorCombineState(); m_bRestoreAlphaCombine = false; if (Glide.State.AlphaLocal == GR_COMBINE_LOCAL_ITERATED) { Glide.State.AlphaLocal = GR_COMBINE_LOCAL_PIXELPIPELINE; m_bRestoreAlphaCombine = true; } if (Glide.State.AlphaOther == GR_COMBINE_OTHER_ITERATED) { Glide.State.AlphaOther = GR_COMBINE_OTHER_PIXELPIPELINE; m_bRestoreAlphaCombine = true; } if (m_bRestoreAlphaCombine) SetAlphaCombineState(); // Update the opengl state for the pixel pipeline RenderUpdateState(); // If the write mode doesn't provide alpha then m_glAlpha is used // as the constant alpha value, and we can use the alpha test // to mask out chromakey pixels switch (m_framebuffer->WriteMode) { case GR_LFBWRITEMODE_565: case GR_LFBWRITEMODE_888: glEnable(GL_ALPHA_TEST); const GLenum alphaTestFunction = GL_EQUAL; const GLfloat alphaTestReferenceValue= m_glAlpha.Scalar * D1OVER255; OpenGL.AlphaTestFunction = alphaTestFunction; OpenGL.AlphaReferenceValue = alphaTestReferenceValue; glAlphaFunc(alphaTestFunction, alphaTestReferenceValue); glReportError(); break; } if (m_useRectangleARB) { // The client texture state is already setup correctly since we just // have to enable the texture rectangle state according to the texture_2D state // (the texture_2d state is not changed when the pixelpipeline mode is active) const GLenum textureTarget = GL_TEXTURE_RECTANGLE_ARB; const bool enableColoralphaTextureUnit1 = OpenGL.ColorAlphaUnitColorEnabledState[0] || OpenGL.ColorAlphaUnitAlphaEnabledState[0]; if (enableColoralphaTextureUnit1) { // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D glEnable(textureTarget); } if (OpenGL.ColorAlphaUnit2) { const bool enableColoralphaTextureUnit2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (enableColoralphaTextureUnit2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glEnable(textureTarget); glActiveTextureARB(OpenGL.ColorAlphaUnit1); } } glReportError(); } } else // simple render mode { if (InternalConfig.EXT_secondary_color) { glDisable(GL_COLOR_SUM_EXT); if (InternalConfig.EXT_secondary_color && OpenGL.ColorAlphaUnit2 == NULL) { glDisableClientState(GL_SECONDARY_COLOR_ARRAY_EXT); glSecondaryColorPointerEXT(3, GL_FLOAT, 0, NULL); } glReportError(); } // @todo: should be here but causes overlay not to be rendered // but commenting out helps neither - framebuffer tiles show // 3D view data and are never updated again - looks weird // -> pixelpipeline in simple render mode broken - sorry folks // if (OpenGL.Texture == false) { // GL_RECTANGLE_ARB overrides GL_RECTANGLE_2D if (!m_useRectangleARB) glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } if (m_useRectangleARB) { glEnable(GL_TEXTURE_RECTANGLE_ARB); } } // Set the origin with clipping glMatrixMode(GL_PROJECTION); glLoadIdentity(); if (m_origin == GR_ORIGIN_LOWER_LEFT) { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMinY, Glide.State.ClipMaxY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.ClipMinY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } else { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMaxY, Glide.State.ClipMinY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.WindowHeight - OpenGL.ClipMaxY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } // The scissor rectangle is not reset, because scissor mode // is only enabled when clearing the buffer glMatrixMode(GL_MODELVIEW); glReportError(); } else { // disable blend glDisable(GL_BLEND); // disable depth buffer glDepthMask(false); // Enable colormask glColorMask( true, true, true, false); // Needed for displaying in-game menus if (Glide.State.DepthBufferMode != GR_DEPTHBUFFER_DISABLE) { glDisable(GL_DEPTH_TEST); } glEnable(GL_ALPHA_TEST); // Update state as we're calling update triggers on restore const GLenum alphaTestFunction = GL_GREATER; const GLfloat alphaTestReferenceValue= 0.0; OpenGL.AlphaTestFunction = alphaTestFunction; OpenGL.AlphaReferenceValue = alphaTestReferenceValue; glAlphaFunc(alphaTestFunction, alphaTestReferenceValue); glReportError(); // Reset the clipping window // and set the origin glMatrixMode(GL_PROJECTION); glLoadIdentity(); if (m_origin == GR_ORIGIN_LOWER_LEFT) { glOrtho(0, Glide.WindowWidth, 0, Glide.WindowHeight, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX, OpenGL.OriginY, OpenGL.WindowWidth, OpenGL.WindowHeight); } else { glOrtho(0, Glide.WindowWidth, Glide.WindowHeight, 0, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX, OpenGL.OriginY, OpenGL.WindowWidth, OpenGL.WindowHeight); } // The scissor rectangle is not changed, because scissor mode // is only enabled when clearing the buffer glMatrixMode(GL_MODELVIEW); glReportError(); // enable framebuffer texture unit if (OpenGL.ColorAlphaUnit2) { const bool disable_coloralpha_texture_unit_2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (disable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit2); glDisableClientState(GL_TEXTURE_COORD_ARRAY); // On MacOS9 (Classic?) the texcoord pointer needs to be reset // to the default value when glLockArrays/glUnlockArrays is used glTexCoordPointer(4, GL_FLOAT, 0, NULL); glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } glDisable(GL_TEXTURE_2D); glActiveTextureARB(OpenGL.ColorAlphaUnit1); } if (!(OpenGL.ColorAlphaUnitColorEnabledState[0] || OpenGL.ColorAlphaUnitAlphaEnabledState[0])) { if (!m_useRectangleARB) glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } } else { if (InternalConfig.EXT_secondary_color) { glDisable(GL_COLOR_SUM_EXT); if (InternalConfig.EXT_secondary_color && OpenGL.ColorAlphaUnit2 == NULL) { glDisableClientState(GL_SECONDARY_COLOR_ARRAY_EXT); glSecondaryColorPointerEXT(3, GL_FLOAT, 0, NULL); glReportError(); } glReportError(); } if (OpenGL.Texture == false) { // GL_RECTANGLE_ARB overrides GL_RECTANGLE_2D if (!m_useRectangleARB) glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); } } } if (m_useRectangleARB) { glEnable(GL_TEXTURE_RECTANGLE_ARB); } glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); glReportError(); } // Turn off fog (unsupported for normal writes, undefined for pixelpipelined writes) if (OpenGL.FogTextureUnit) { // @todo To be tested and reviewed if (OpenGL.Fog || Glide.State.ColorCombineInvert || Glide.State.AlphaInvert) { glActiveTextureARB(OpenGL.FogTextureUnit); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.FogTextureUnit); glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, NULL); glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } glDisable(GL_TEXTURE_2D); glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } } if (OpenGL.Fog && InternalConfig.FogMode != OpenGLideFogEmulation_None && InternalConfig.FogMode != OpenGLideFogEmulation_EnvCombine) { glDisable(GL_FOG); glReportError(); } } void Framebuffer::restore_gl_state(bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::restore_gl_state(%d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::restore_gl_state"); // Restore the cull mode switch (Glide.State.CullMode) { case GR_CULL_DISABLE: break; case GR_CULL_NEGATIVE: case GR_CULL_POSITIVE: glEnable(GL_CULL_FACE); break; } if (InternalConfig.EXT_clip_volume_hint && OpenGL.ClipVerticesEnabledState) { glHint(GL_CLIP_VOLUME_CLIPPING_HINT_EXT, GL_NICEST); } // Restore the clipping window glMatrixMode(GL_PROJECTION); glLoadIdentity(); if ( Glide.State.OriginInformation == GR_ORIGIN_LOWER_LEFT ) { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMinY, Glide.State.ClipMaxY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.ClipMinY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } else { glOrtho(Glide.State.ClipMinX, Glide.State.ClipMaxX, Glide.State.ClipMaxY, Glide.State.ClipMinY, OpenGL.ZNear, OpenGL.ZFar); glViewport(OpenGL.OriginX + OpenGL.ClipMinX, OpenGL.OriginY + OpenGL.WindowHeight - OpenGL.ClipMaxY, OpenGL.ClipMaxX - OpenGL.ClipMinX, OpenGL.ClipMaxY - OpenGL.ClipMinY); } // The scissor rectangle is not reset, because scissor mode // is only enabled when clearing the buffer glMatrixMode( GL_MODELVIEW ); glReportError(); // Restore fog const bool enable_fog_texture_unit = OpenGL.FogTextureUnit && ((OpenGL.Fog && InternalConfig.FogMode == OpenGLideFogEmulation_EnvCombine) || Glide.State.ColorCombineInvert || Glide.State.AlphaInvert); if (enable_fog_texture_unit) { glActiveTextureARB(OpenGL.FogTextureUnit); glEnable(GL_TEXTURE_2D); // We're not using glDrawArrays to render the frame buffer, // but without disabling the client state the next texture drawn // by RenderDrawTriangles would get the wrong coordinates. // Can be observed in Carmageddon: The sky texture is rendered "too high" if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.FogTextureUnit); glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(1, GL_FLOAT, 0, &OGLRender.TFog[0]); glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } if (OpenGL.Fog && InternalConfig.FogMode != OpenGLideFogEmulation_None && InternalConfig.FogMode != OpenGLideFogEmulation_EnvCombine) { glEnable(GL_FOG); glReportError(); } if (pixelpipeline) { if (OpenGL.ColorAlphaUnit2) { // restore current values if (m_bRestoreColorCombine) { if (Glide.State.ColorCombineLocal == GR_COMBINE_LOCAL_PIXELPIPELINE) Glide.State.ColorCombineLocal = GR_COMBINE_LOCAL_ITERATED; if (Glide.State.ColorCombineOther == GR_COMBINE_OTHER_PIXELPIPELINE) Glide.State.ColorCombineOther = GR_COMBINE_OTHER_ITERATED; SetColorCombineState(); } if(m_bRestoreAlphaCombine) { if (Glide.State.AlphaLocal == GR_COMBINE_LOCAL_PIXELPIPELINE) Glide.State.AlphaLocal = GR_COMBINE_LOCAL_ITERATED; if (Glide.State.AlphaOther == GR_COMBINE_OTHER_PIXELPIPELINE) Glide.State.AlphaOther = GR_COMBINE_LOCAL_ITERATED; SetAlphaCombineState(); } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D if (m_useRectangleARB) { // Disable texture rectangle (for those units it has been enabled) const GLenum textureTarget = GL_TEXTURE_RECTANGLE_ARB; const bool disableColorAlphaTextureUnit1 = OpenGL.ColorAlphaUnitColorEnabledState[0] || OpenGL.ColorAlphaUnitAlphaEnabledState[0]; if (disableColorAlphaTextureUnit1) { glDisable(textureTarget); } if (OpenGL.ColorAlphaUnit2) { const bool disableColorAlphaTextureUnit2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (disableColorAlphaTextureUnit2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glDisable(textureTarget); glActiveTextureARB(OpenGL.ColorAlphaUnit1); } } glReportError(); // The client texture state is already setup correctly since we just // have to adjust the texture rectangle state to the texture_2d state } switch (m_framebuffer->WriteMode) { case GR_LFBWRITEMODE_565: case GR_LFBWRITEMODE_888: SetChromaKeyAndAlphaState(); break; } } else // simple render mode { if ( InternalConfig.EXT_secondary_color ) { glEnable(GL_COLOR_SUM_EXT); if (InternalConfig.EXT_secondary_color && OpenGL.ColorAlphaUnit2 == NULL) { glEnableClientState(GL_SECONDARY_COLOR_ARRAY_EXT); glSecondaryColorPointerEXT(3, GL_FLOAT, 4 * sizeof(GLfloat), &OGLRender.TColor2[0]); } glReportError(); } if (OpenGL.Texture == false) { // GL_RECTANGLE_ARB overrides GL_RECTANGLE_2D if (InternalConfig.EXT_compiled_vertex_array) { glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, NULL); } if (!m_useRectangleARB) glDisable(GL_TEXTURE_2D); } if (m_useRectangleARB) { glDisable(GL_TEXTURE_RECTANGLE_ARB); } // Restore the previous texture environment SetColorCombineState(); } } else { // restore depth state if (OpenGL.DepthBufferWritting) { glDepthMask(true); } if (Glide.State.DepthBufferMode != GR_DEPTHBUFFER_DISABLE) { glEnable(GL_DEPTH_TEST); } // Restore colormask const bool rgb = Glide.State.ColorMask; glColorMask(rgb, rgb, rgb, Glide.State.AlphaMask); // Blend if (OpenGL.Blend) { glEnable(GL_BLEND); } glReportError(); // texture units if (OpenGL.ColorAlphaUnit2) { const bool enable_coloralpha_texture_unit_2 = OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]; if (enable_coloralpha_texture_unit_2) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glEnable(GL_TEXTURE_2D); if (InternalConfig.EXT_compiled_vertex_array) { glClientActiveTextureARB(OpenGL.ColorAlphaUnit2); glEnableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, &OGLRender.TTexture[0]); glClientActiveTextureARB(OpenGL.ColorAlphaUnit1); } glActiveTextureARB(OpenGL.ColorAlphaUnit1); } if (!(OpenGL.ColorAlphaUnitColorEnabledState[0] || OpenGL.ColorAlphaUnitAlphaEnabledState[0])) { if (InternalConfig.EXT_compiled_vertex_array) { glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer( 4, GL_FLOAT, 0, NULL ); } if (!m_useRectangleARB) glDisable(GL_TEXTURE_2D); } // Restore the previous texture environment glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE_EXT); } else { if ( InternalConfig.EXT_secondary_color ) { glEnable(GL_COLOR_SUM_EXT); if (InternalConfig.EXT_secondary_color && OpenGL.ColorAlphaUnit2 == NULL) { glEnableClientState(GL_SECONDARY_COLOR_ARRAY_EXT); glSecondaryColorPointerEXT(3, GL_FLOAT, 4 * sizeof(GLfloat), &OGLRender.TColor2[0]); } glReportError(); } if (OpenGL.Texture == false) { if (InternalConfig.EXT_compiled_vertex_array) { glDisableClientState(GL_TEXTURE_COORD_ARRAY); glTexCoordPointer(4, GL_FLOAT, 0, NULL); } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D and this // has already been skipped in setState if (!m_useRectangleARB) glDisable(GL_TEXTURE_2D); } // Restore the previous texture environment SetColorCombineState(); } // GL_RECTANGLE_ARB overrides GL_TEXTURE_2D if (m_useRectangleARB) { glDisable(GL_TEXTURE_RECTANGLE_ARB); } glReportError(); // This must be a forced update because GlideState changes of ChromaKeyMode // that don't change the corresponding GL-state are filtered out ForceChromaKeyAndAlphaStateUpdate(); } glReportError(); VERIFY_ACTIVE_TEXTURE_UNIT(OpenGL.ColorAlphaUnit1); VERIFY_TEXTURE_ENABLED_STATE(); } bool Framebuffer::draw(const tilesize* tilesizetable, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::draw(---, %d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::draw()"); bool init_second_textureunit = pixelpipeline && OpenGL.ColorAlphaUnit2; FxU32* texbuffer = reinterpret_cast(m_texbuffer->Address); // Render the tiles GLint n = 0; GLint x; GLint y = 0; GLint y_step; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; // Use unique (but always the same) name for each texture in order // to maintain the size and avoid vram memory reallocation GLint texturename = m_textureNames[n]; const TileUpdateState updateState = createTextureData(texbuffer, x, y, x_step, y_step, n); if (updateState != TileUpdateState_TileEmpty) { #ifdef DEBUG_TILE_RENDERING unsigned int color; if (updateState == TileUpdateState_TileDownloadToGPU) // edges of downloaded tiles are red color = 0xff0000ff; else // edges of rendered tiles are cyan color = 0x00ffffff; ((long*) texbuffer)[0] = ((long*) texbuffer)[1] = ((long*) texbuffer)[x_step] = ((long*) texbuffer)[x_step -1] = ((long*) texbuffer)[x_step -2] = ((long*) texbuffer)[2 * x_step -1] = ((long*) texbuffer)[(x_step - 1) * y_step] = ((long*) texbuffer)[(x_step - 1) * y_step + 1] = ((long*) texbuffer)[(x_step - 2) * y_step] = ((long*) texbuffer)[x_step * y_step - 1] = ((long*) texbuffer)[x_step * y_step - 2] = ((long*) texbuffer)[x_step * (y_step - 1) - 1] = color; #endif glBindTexture(GL_TEXTURE_2D, texturename); if (init_second_textureunit) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glBindTexture(GL_TEXTURE_2D, texturename); glActiveTextureARB(OpenGL.ColorAlphaUnit1); } #ifndef DEBUG_TILE_RENDERING if (updateState == TileUpdateState_TileDownloadToGPU // || InternalConfig.APPLE_client_storage == false ) { glTexImage2D(GL_TEXTURE_2D, 0, m_glInternalFormat, x_step, y_step, 0, m_glFormat, m_glType, texbuffer); glReportError(); } #endif static struct { const GLfloat bl[4]; const GLfloat br[4]; const GLfloat tr[4]; const GLfloat tl[4]; } texcoords = { {0.0, 0.0, 1.0, 1.0}, {1.0, 0.0, 1.0, 1.0}, {1.0, 1.0, 1.0, 1.0}, {0.0, 1.0, 1.0, 1.0} }; glBegin(GL_QUADS); // counter clockwise glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.bl[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.bl[0]); } glVertex3f(x, y, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.br[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.br[0]); } glVertex3f(x + x_step, y, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.tr[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.tr[0]); } glVertex3f(x + x_step, y + y_step, m_glDepth); glColor3f(1.0, 1.0, 1.0); glTexCoord4fv(&texcoords.tl[0]); if (init_second_textureunit) { glMultiTexCoord4fvARB(OpenGL.ColorAlphaUnit2, &texcoords.tl[0]); } glVertex3f(x, y + y_step , m_glDepth); glEnd(); glReportError(); // Advance to the next texbuffer location texbuffer += x_step * y_step; } n ++; } } s_Framebuffer.SetRenderBufferChanged(); return y == m_height && x == m_width; } bool Framebuffer::drawCompiledVertexArrays(const tilesize* tilesizetable, int vertexarrayindex, int tilecount, bool pixelpipeline) { #ifdef OGL_FRAMEBUFFER GlideMsg( "Framebuffer::draw(---, %d)\n", pixelpipeline); #endif glReportErrors("Framebuffer::drawCompiledVertexArrays()"); // Finish rendering RenderUnlockArrays(); // @todo: I don't think the command is necessary // but it fixes garbage in top-left corner in Carmageddon map mode glFlush(); glReportError(); // Transfer coords to VRAM glLockArraysEXT(vertexarrayindex * 3, tilecount * 6); glReportError(); OGLRender.BufferLocked = true; const bool init_second_textureunit = pixelpipeline && (OpenGL.ColorAlphaUnitColorEnabledState[1] || OpenGL.ColorAlphaUnitAlphaEnabledState[1]); FxU32* texbuffer = reinterpret_cast(m_texbuffer->Address); // Render the tiles GLint n = 0; GLint x; GLint y = 0; GLint y_step; const GLenum textureTarget = m_useRectangleARB ? GL_TEXTURE_RECTANGLE_ARB : GL_TEXTURE_2D; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; const TileUpdateState updateState = createTextureData(texbuffer, x, y, x_step, y_step, n); if (updateState != TileUpdateState_TileEmpty) { #ifdef DEBUG_TILE_RENDERING unsigned int color; if (updateState == TileUpdateState_TileDownloadToGPU) // edges of downloaded tiles are red color = 0xff0000ff; else // edges of rendered tiles are cyan color = 0x00ffffff; ((long*) texbuffer)[0] = ((long*) texbuffer)[1] = ((long*) texbuffer)[x_step] = ((long*) texbuffer)[x_step -1] = ((long*) texbuffer)[x_step -2] = ((long*) texbuffer)[2 * x_step -1] = ((long*) texbuffer)[x_step * (y_step - 2)] = ((long*) texbuffer)[x_step * (y_step - 1)] = ((long*) texbuffer)[x_step * (y_step - 1) + 1] = ((long*) texbuffer)[x_step * y_step - 1] = ((long*) texbuffer)[x_step * y_step - 2] = ((long*) texbuffer)[x_step * (y_step - 1) - 1] = color; #endif // Use unique (but always the same) name for each // texture in order to be able to reuse tile data const GLint texturename = m_textureNames[n]; // The texture rectangle is better suited for video, // which is close to a framebuffer if (init_second_textureunit) { glActiveTextureARB(OpenGL.ColorAlphaUnit2); glReportError(); glBindTexture(textureTarget, texturename); glReportError(); glActiveTextureARB(OpenGL.ColorAlphaUnit1); glReportError(); } glBindTexture(textureTarget, texturename); glReportError(); #ifndef DEBUG_TILE_RENDERING if (updateState == TileUpdateState_TileDownloadToGPU // || InternalConfig.APPLE_client_storage == false ) #endif { glTexImage2D(textureTarget, 0, m_glInternalFormat, x_step, y_step, 0, m_glFormat, m_glType, texbuffer); glReportError(); } // Draw the tile glDrawArrays(GL_TRIANGLES, vertexarrayindex * 3 + n * 6, 6); glReportError(); // Advance to the next texbuffer location texbuffer += x_step * y_step; } n++; } } s_Framebuffer.SetRenderBufferChanged(); return y == m_height && x == m_width; } int Framebuffer::buildVertexArrays(const tilesize* tilesizetable, int vertexarrayindex) { // Compute coordinates for compiled vertex arrays TColorStruct* pC = &OGLRender.TColor[vertexarrayindex]; TVertexStruct* pV = &OGLRender.TVertex[vertexarrayindex]; TTextureStruct* pTS = &OGLRender.TTexture[vertexarrayindex]; int n = 0; GLint y = 0; GLint y_step; for(int w = 0; y < m_height && w < MaxTiles; w++, y += y_step) { y_step = tilesizetable[w].y; GLint x = 0; GLint x_step; for(int v = 0; x < m_width && v < MaxTiles; v++, x += x_step) { x_step = tilesizetable[w].x[v]; // Write coordinates counter clockwise into render buffers pC->ar = pC->ag = pC->ab = pC->br = pC->bg = pC->bb = pC->cr = pC->cg = pC->cb = pC->aa = pC->ba = pC->ca = 1.0f; pV->ax = x; pV->ay = y; pV->bx = x + x_step; pV->by = y; pV->cx = x + x_step; pV->cy = y + y_step; pV->az = pV->bz = pV->cz = m_glDepth; pTS->as = 0.0; pTS->at = 0.0; pTS->bs = m_useRectangleARB ? x_step : 1.0f; pTS->bt = 0.0; pTS->cs = m_useRectangleARB ? x_step : 1.0f; pTS->ct = m_useRectangleARB ? y_step : 1.0f; pTS->aq = pTS->bq = pTS->cq = 0.0f; pTS->aoow = pTS->boow = pTS->coow = 1.0f; pC++; pV++; pTS++; pC->ar = pC->ag = pC->ab = pC->br = pC->bg = pC->bb = pC->cr = pC->cg = pC->cb = pC->aa = pC->ba = pC->ca = 1.0f; pV->ax = x + x_step; pV->ay = y + y_step; pV->bx = x; pV->by = y + y_step; pV->cx = x; pV->cy = y; pV->az = pV->bz = pV->cz = m_glDepth; pTS->as = m_useRectangleARB ? x_step : 1.0f; pTS->at = m_useRectangleARB ? y_step : 1.0f; pTS->bs = 0.0; pTS->bt = m_useRectangleARB ? y_step : 1.0f; pTS->cs = 0.0; pTS->ct = 0.0; pTS->aq = pTS->bq = pTS->cq = 0.0f; pTS->aoow = pTS->boow = pTS->coow = 1.0f; pC++; pV++; pTS++; n++; } } return n; } #ifdef __ALTIVEC__ // altivec code inline Framebuffer::TileUpdateState Framebuffer::Convert565Kto8888_AV(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride, int checksumIndex) { const vector bool short chromakey_565_av = m_ChromaKey.Vector; const int width_av = width >> 3; // 8 16-bit words const int stride_av = stride >> 3; // 8 16-bit words const int jump_av = width_av + stride_av; vector bool short* src_av = (vector bool short*) buffer1; // Setup channel 0 for reading one row of 565 ushorts from src into the L1 cache // This isn't read again soon, just written back once so we can bypass L2 cache const int src_control = (((width_av >> 4) & 0x1f) << 3) + (1 << 8) + (stride_av << 16); vec_dstt(src_av, src_control, 0); int h = height; // loop through the src to check whether anything has to be copied at all vector bool short* stop_zero_av = &src_av[width_av]; do { do { const vector bool short pixels_565_av = *src_av; if (!vec_all_eq(pixels_565_av, chromakey_565_av)) goto create_8888_texture_1_av; // Test clear first before jumping to create_8888_texture_1_av src_av++; } while (src_av != stop_zero_av); src_av += stride_av; // Update channel 0 to prefetch the next row into the L1 cache vec_dstt(src_av, src_control, 0); stop_zero_av += jump_av; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture_1_av: // Delete dst up to the last chromakey entry in src stop_zero_av = src_av; src_av = (vector bool short*) buffer1; vector unsigned long* dst_av = (vector unsigned long*) buffer2; const vector unsigned long null_av = vec_splat_u32(0); // We're just writing to dst, no reading must occur, and starting a prefetch is a bad idea h = height; vector bool short* stop_av = &src_av[width_av]; do { do { if (src_av == stop_zero_av) goto create_8888_texture_2_av; // Test clear first // clear cacheline to prevent it from being read-in (32 bytes = 2 altivec writes) // we're just clearing the cache line since we're going to write zeros anyway __dcbz(dst_av, 0); dst_av += 2; src_av++; } while (src_av != stop_av); src_av += stride_av; stop_av += jump_av; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture_2_av: // Build permute vector for storing high/lo 565 pixels into RgbxA // - results in R = r565+ggg, G = gggbbbbb, B = 0, A = glAlpha // -> good for comparison, green and blue are converted afterwards const vector unsigned char permute_hi_av = {0x00, 0x01, 0x12, 0x13, 0x02, 0x03, 0x16, 0x17, 0x04, 0x05, 0x1a, 0x1b, 0x06, 0x07, 0x1e, 0x1f}; // Computing the permute table just takes 2 instructions instead of 1 instruction + 4 memory reads const vector unsigned char permute_lo_av = vec_or(permute_hi_av, vec_splat_u8(8)); // const vector unsigned long alpha_8888_av = {m_glAlpha, m_glAlpha, m_glAlpha, m_glAlpha}; const vector unsigned long alpha_8888_av = m_glAlpha.Vector; // Build chromakey and alpha RgbxA vector const vector unsigned long chromakey_RgbxA_av = vec_perm((const vector unsigned long) chromakey_565_av, alpha_8888_av, permute_lo_av); // Constants const vector unsigned long const_3_av = vec_splat_u32(3); const vector unsigned long const_5_av = vec_splat_u32(5); // R5G6B500AA color masks const vector unsigned long mask_8888_ra = {0xf80000ff, 0xf80000ff, 0xf80000ff, 0xf80000ff}; const vector unsigned long mask_8888_g = {0x07e00000, 0x07e00000, 0x07e00000, 0x07e00000}; // Computing the mask just takes 2 instructions instead of 1 instruction + 4 memory reads const vector unsigned long mask_8888_b = vec_sr(mask_8888_ra, vec_splat_u32(11)); vector unsigned long pixels_8888_src_av; vector bool long mask; vector unsigned long p; vector unsigned long q; vector unsigned long pixels_8888_dst_av; // Checksum the tile vector unsigned long c = null_av; vector unsigned long d; // Continue the loop and convert pixels from 565 to 8888 vec_dstt(src_av, src_control, 0); do { do { const vector unsigned long pixels_565_av = (const vector unsigned long) (*src_av); // tile checksum part 1 d = vec_sr(c, const_5_av); c = vec_add(c, pixels_565_av); // restore chroma key for next update *src_av++ = chromakey_565_av; // hi-word pixels pixels_8888_src_av = vec_perm(pixels_565_av, alpha_8888_av, permute_hi_av); mask = vec_cmpeq(pixels_8888_src_av, chromakey_RgbxA_av); // Keep red and alpha component pixels_8888_dst_av = vec_and(pixels_8888_src_av, mask_8888_ra); // add green component p = vec_and(pixels_8888_src_av, mask_8888_g); q = vec_sr(p, const_3_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // add blue component p = vec_and(pixels_8888_src_av, mask_8888_b); q = vec_sr(p, const_5_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // We're just writing to dst and thus can clear the cacheline in order // to avoid the read-in from system memory (32 bytes = 2 altivec writes) // Note: This is a G4 hack, but on a G5 the code will be fast enough anyway __dcbz(dst_av, 0); // Select between pixels and chromakey *dst_av++ = vec_sel(pixels_8888_dst_av, null_av, mask); // tile checksum part 2 c = vec_xor(c, d); // lo-word pixels pixels_8888_src_av = vec_perm(pixels_565_av, alpha_8888_av, permute_lo_av); mask = vec_cmpeq(pixels_8888_src_av, chromakey_RgbxA_av); // Keep red and alpha component pixels_8888_dst_av = vec_and(pixels_8888_src_av, mask_8888_ra); // add green component p = vec_and(pixels_8888_src_av, mask_8888_g); q = vec_sr(p, const_3_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // add blue component p = vec_and(pixels_8888_src_av, mask_8888_b); q = vec_sr(p, const_5_av); pixels_8888_dst_av = vec_or(pixels_8888_dst_av, q); // Select between pixels and chromakey *dst_av++ = vec_sel(pixels_8888_dst_av, null_av, mask); } while (src_av != stop_av); src_av += stride_av; vec_dstt(src_av, src_control, 0); stop_av += jump_av; } while (--h); // Skip downloading tile data to the gpu if the content hasn't changed if (vec_all_eq(c, m_tileChecksums[checksumIndex])) return TileUpdateState_TileDrawOnly; // The tile has been converted, been changed and must be downloaded to the gpu m_tileChecksums[checksumIndex] = c; return TileUpdateState_TileDownloadToGPU; } #endif // Non-Altivec-code inline Framebuffer::TileUpdateState Framebuffer::Convert565Kto8888(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once const register unsigned long chromakey1 = m_ChromaKey.Scalar << 16; const register unsigned long chromakey2 = m_ChromaKey.Scalar; const register unsigned long chromakey12 = chromakey1 | chromakey2; width = width >> 1; stride = stride >> 1; register unsigned long pixel; register unsigned long* stop; register unsigned long jump = width + stride; register unsigned long* src = reinterpret_cast(buffer1); // check if tile must be processed in advance // to avoid useless writes to main memory // The tile should at least fit into the second level cache // so reading it again wouldn't hurt as much as doing needless writes register unsigned long h = height; stop = &src[width]; do { do { pixel = *src++; if (pixel != chromakey12) goto create_8888_texture; } while (src != stop); src += stride; stop += jump; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture: const register unsigned long alpha = m_glAlpha.Scalar; const register unsigned long null = 0x00000000; const register unsigned long mask_pixel1 = 0xffff0000; const register unsigned long mask_pixel2 = 0x0000ffff; const register unsigned long mask_pixel1_r = 0xf8000000; const register unsigned long mask_pixel1_g = 0x07e00000; const register unsigned long mask_pixel1_b = 0x001f0000; const register unsigned long mask_pixel2_r = 0x0000f800; const register unsigned long mask_pixel2_g = 0x000007e0; const register unsigned long mask_pixel2_b = 0x0000001f; src = reinterpret_cast(buffer1); stop = &src[width]; do { do { // GL_RGBA pixel = *src; if (pixel == chromakey12) { *buffer2++ = null; *buffer2++ = null; } else { *src = chromakey12; if ( (pixel & mask_pixel1) == chromakey1) { *buffer2++ = null; } else { *buffer2++ = ( alpha | // A ( pixel & mask_pixel1_b ) >> 5 | // B ( pixel & mask_pixel1_g ) >> 3 | // G ( pixel & mask_pixel1_r )); // R } if ( (pixel & mask_pixel2) == chromakey2) { *buffer2++ = null; } else { *buffer2++ = ( alpha | // A ( pixel & mask_pixel2_b ) << 11 | // B ( pixel & mask_pixel2_g ) << 13 | // G ( pixel & mask_pixel2_r ) << 16); // R } } src++; } while (src != stop); src += stride; stop += jump; } while (--height); return TileUpdateState_TileDownloadToGPU; } inline Framebuffer::TileUpdateState Framebuffer::Convert1555Kto8888(FxU16* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once register unsigned long pixel; register unsigned long x; register unsigned long* src = reinterpret_cast(buffer1); const unsigned long null = 0x00000000; register unsigned long dstpixel = null; const register unsigned long chromakey1 = m_ChromaKey.Scalar << 16; const register unsigned long chromakey2 = m_ChromaKey.Scalar; const register unsigned long chromakey12 = chromakey1 | chromakey2; const register unsigned long alpha = m_glAlpha.Scalar; const register unsigned long mask_pixel1 = 0xffff0000; const register unsigned long mask_pixel2 = 0x0000ffff; const register unsigned long mask_pixel1_r = 0x7c000000; const register unsigned long mask_pixel1_g = 0x03e00000; const register unsigned long mask_pixel1_b = 0x001f0000; const register unsigned long mask_pixel2_r = 0x00007c00; const register unsigned long mask_pixel2_g = 0x000003e0; const register unsigned long mask_pixel2_b = 0x0000001f; width >>= 1; stride >>= 1; do { x = width; do { // GL_RGBA pixel = *src; if (pixel == chromakey12) { *buffer2++ = null; *buffer2++ = null; } else { *src = chromakey12; if ( (pixel & mask_pixel1) == chromakey1) { *buffer2++ = null; } else { dstpixel = ( alpha | // A ( pixel & mask_pixel1_b ) >> 5 | // B ( pixel & mask_pixel1_g ) >> 2 | // G ( pixel & mask_pixel1_r ) << 1); // R *buffer2++ = dstpixel; } if ( (pixel & mask_pixel2) == chromakey2) { *buffer2++ = null; } else { dstpixel = ( alpha | // A ( pixel & mask_pixel2_b ) << 11 | // B ( pixel & mask_pixel2_g ) << 14 | // G ( pixel & mask_pixel2_r ) << 17); // R *buffer2++ = dstpixel; } } src++; } while (--x); src += stride; } while (--height); return dstpixel != null ? TileUpdateState_TileDownloadToGPU : TileUpdateState_TileEmpty; } inline Framebuffer::TileUpdateState Framebuffer::ConvertARGB8888Kto8888(FxU32* buffer1, register FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride) { // Process two pixels at once const register unsigned long chromakey = m_ChromaKey.Scalar || (m_ChromaKey.Scalar << 16); register unsigned long pixel; register unsigned long* stop; register unsigned jump = width + stride; register unsigned long* src = buffer1; // check if tile must be processed in advance // to avoid useless writes to main memory // The tile should at least fit into the second level cache // so reading it again wouldn't hurt as much as doing needless writes register unsigned long h = height; stop = &src[width]; do { do { pixel = *src++; if (pixel != chromakey) goto create_8888_texture; } while (src != stop); src += stride; stop += jump; } while (--h); return TileUpdateState_TileEmpty; create_8888_texture: const register unsigned long alpha = m_glAlpha.Scalar; src = buffer1; stop = &src[width]; do { do { // GL_RGBA pixel = *src; if (pixel == chromakey) { *buffer2++ = 0; } else { *src = chromakey; *buffer2++ = (pixel << 8) | alpha; } src++; } while (src != stop); src += stride; stop += jump; } while (--height); return TileUpdateState_TileDownloadToGPU; } inline Framebuffer::TileUpdateState Framebuffer::createTextureData(FxU32* texbuffer, FxU32 x, FxU32 y, FxU32 x_step, FxU32 y_step, int checksumIndex) { FxU32 stride = (m_width - x_step); FxU32 index = x + y * m_width; if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_565) { #ifdef __ALTIVEC__ if (UserConfig.VectorUnitType == OpenGLideVectorUnitType_Altivec) { #ifdef OGL_FRAMEBUFFER const vector unsigned long c = m_tileChecksums[checksumIndex]; #endif TileUpdateState state = Convert565Kto8888_AV(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride, checksumIndex); #ifdef OGL_FRAMEBUFFER GlideMsg("Tile %d (%d,%d)-(%d,%d) update state is %s (%vlx)->(%vlx)\n", checksumIndex, x, y, x_step, y_step, (state ==TileUpdateState_TileDownloadToGPU) ? "DownLoadToGPU" : ((state == TileUpdateState_TileDrawOnly) ? "DrawOnly" : "TileEmpty"), c, m_tileChecksums[checksumIndex]); #endif return state; } else #endif return Convert565Kto8888(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride); } else if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_1555) { return Convert1555Kto8888(&m_framebuffer->Address[index], texbuffer, x_step, y_step, stride); } else if (m_framebuffer->WriteMode == GR_LFBWRITEMODE_888) { FxU32* framebuffer = &reinterpret_cast(m_framebuffer->Address)[index]; return ConvertARGB8888Kto8888(framebuffer, texbuffer, x_step, y_step, stride); } else { return TileUpdateState_TileEmpty; } } \ No newline at end of file diff --git a/MacGLide/OpenGLide/Framebuffer.h b/MacGLide/OpenGLide/Framebuffer.h index 23aadf1..8ba2982 100644 --- a/MacGLide/OpenGLide/Framebuffer.h +++ b/MacGLide/OpenGLide/Framebuffer.h @@ -46,25 +46,26 @@ protected: int buildVertexArrays(const tilesize* tilesizetable, int vertexarrayindex); void set_gl_state(bool pixelpipeline); void restore_gl_state(bool pixelpipeline); + // Pixel conversion enum TileUpdateState { TileUpdateState_TileEmpty = false, TileUpdateState_TileDownloadToGPU = 1, // true, TileUpdateState_TileDrawOnly = -1 // -true }; - static const int m_tileCount = MaxTiles * MaxTiles; - GLuint m_textureNames[m_tileCount]; + static const int s_maxTiles = MaxTiles * MaxTiles; + GLuint m_textureNames[s_maxTiles]; inline TileUpdateState createTextureData(FxU32* texbuffer, FxU32 x, FxU32 y, FxU32 x_step, FxU32 y_step, int checksumIndex); inline TileUpdateState Convert565Kto8888(FxU16* buffer1, FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride); #ifdef __ALTIVEC__ inline TileUpdateState Convert565Kto8888_AV(FxU16* buffer1, FxU32* buffer2, register FxU32 width, register FxU32 height, register FxU32 stride, int checksumIndex); - vector unsigned long m_tileChecksums[m_tileCount]; + vector unsigned long m_tileChecksums[s_maxTiles]; #endif inline TileUpdateState Convert1555Kto8888(FxU16* buffer1, register FxU32* buffer2, FxU32 register width, register FxU32 height, register FxU32 stride); inline TileUpdateState ConvertARGB8888Kto8888(FxU32* buffer1, register FxU32* buffer2, FxU32 register width, register FxU32 height, register FxU32 stride); - bool m_use_client_storage; bool m_useRectangleARB; bool m_must_clear_buffer; + // Format GrOriginLocation_t m_origin; GLint m_glInternalFormat; GLint m_glFormat; @@ -72,18 +73,22 @@ protected: bool m_format_valid; BufferStruct* m_framebuffer; BufferStruct* m_texbuffer; + // Dimensions FxU32 m_width; FxU32 m_height; GLint m_x_step_start; GLint m_y_step_start; GLint m_x_step_start_opaque; GLint m_y_step_start_opaque; + // Tiles + inline int getTileCount() const; tilesize m_tilesizes[MaxTiles]; int m_tilesizesCount; int m_tilesizesVertexArrayIndex; const tilesize* m_custom_tilesizes; int m_customtilesizesCount; int m_customtilesizesVertexArrayIndex; + // Color, depth and alpha GLfloat m_glDepth; union { diff --git a/MacGLide/OpenGLide/GLExtensions.cpp b/MacGLide/OpenGLide/GLExtensions.cpp index 888411b..0c00d1b 100644 --- a/MacGLide/OpenGLide/GLExtensions.cpp +++ b/MacGLide/OpenGLide/GLExtensions.cpp @@ -65,7 +65,7 @@ stExtensionSupport glNecessaryExt[] = { "GL_SGIS_texture_edge_clamp", OGL_EXT_DESIRED, &dummyExtVariable, &InternalConfig.EXT_SGIS_texture_edge_clamp }, { "GL_EXT_paletted_texture", OGL_EXT_DESIRED, &UserConfig.EXT_paletted_texture, &InternalConfig.EXT_paletted_texture }, { "GL_APPLE_packed_pixels", OGL_EXT_REQUIRED, &dummyExtVariable, &dummyExtVariable2 }, - { "GL_APPLE_client_storage", OGL_EXT_DESIRED, &UserConfig.APPLE_client_storage, &InternalConfig.APPLE_client_storage }, + { "GL_APPLE_client_storage", OGL_EXT_DESIRED, &UserConfig.APPLE_client_storage, &InternalConfig.APPLE_client_storage }, { "GL_EXT_compiled_vertex_array", OGL_EXT_DESIRED, &UserConfig.EXT_compiled_vertex_array,&InternalConfig.EXT_compiled_vertex_array }, { "GL_ARB_texture_rectangle", OGL_EXT_DESIRED, &UserConfig.ARB_texture_rectangle, &InternalConfig.ARB_texture_rectangle }, #ifdef OPENGLIDE_SYSTEM_HAS_FOGCOORD @@ -254,7 +254,7 @@ void GLExtensions(void) glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, true); glReportError(); } - + // Anisotropic filtering if (InternalConfig.EXT_texture_filter_anisotropic) { GLint MaxAnisotropyLevel;