3
0
mirror of https://github.com/hyprwm/Hyprland.git synced 2026-02-04 10:45:22 +00:00

renderer: optimise shader usage further, split shaders and add more caching (#12992)

* shader: split CM rgba/rgbx into discard ones

make it branchless if we have no discards.

* shader: ensure we dont stall on vbo uv buffer

if we render a new texture before the previous was done gpu wise its
going to stall until done, call glBufferData to orphan the data.
this allows the driver to return a new memory block immediately
if the GPU is still reading from the previous one

* protocols: ensure we reset GL_PACK_ALIGNMENT

reset GL_PACK_ALIGNMENT back to the default initial value of 4

* shader: use unsigned short in VAO

loose a tiny bit of precision but gain massive bandwidth reductions.
use GL_UNSIGNED_SHORT and set it as normalized. clamp and round the UV
for uint16_t in customUv.

* shader: interleave vertex buffers

use std::array for fullverts, use a single interleaved buffer for
position and uv, should in theory improve cache locality. and also remove
the need to have two buffers around.

* shader: revert precision drop

we need the float precision because we might have 1.01 or similiar
floats entering CM shader maths, and rounding/clamping those means the
maths turns out wrong. so revert back to float, sadly higher bandwidth
usage.

* update doColorManagement api

* convert primaries to XYZ on cpu

* remove unused primaries uniform

---------

Co-authored-by: UjinT34 <ujint34@mail.ru>
This commit is contained in:
Tom Englund
2026-01-17 15:31:19 +01:00
committed by GitHub
parent 92a3b91999
commit c99eb23869
13 changed files with 185 additions and 95 deletions

View File

@ -425,6 +425,7 @@ bool CScreencopyFrame::copyShm() {
}
}
glPixelStorei(GL_PACK_ALIGNMENT, 4);
g_pHyprOpenGL->m_renderData.pMonitor.reset();
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);

View File

@ -334,6 +334,7 @@ bool CToplevelExportFrame::copyShm(const Time::steady_tp& now) {
}
outFB.unbind();
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
return true;

View File

@ -43,6 +43,7 @@
#include <fcntl.h>
#include <gbm.h>
#include <filesystem>
#include <cstring>
#include "./shaders/Shaders.hpp"
using namespace Hyprutils::OS;
@ -896,7 +897,9 @@ bool CHyprOpenGLImpl::initShaders() {
else {
std::vector<SFragShaderDesc> CM_SHADERS = {{
{SH_FRAG_CM_RGBA, "CMrgba.frag"},
{SH_FRAG_CM_RGBA_DISCARD, "CMrgbadiscard.frag"},
{SH_FRAG_CM_RGBX, "CMrgbx.frag"},
{SH_FRAG_CM_RGBX_DISCARD, "CMrgbadiscard.frag"},
{SH_FRAG_CM_BLURPREPARE, "CMblurprepare.frag"},
{SH_FRAG_CM_BORDER1, "CMborder.frag"},
}};
@ -1229,12 +1232,13 @@ void CHyprOpenGLImpl::passCMUniforms(WP<CShader> shader, const NColorManagement:
shader->setUniformInt(SHADER_TARGET_TF, targetImageDescription->value().transferFunction);
const auto targetPrimaries = targetImageDescription->getPrimaries();
const std::array<GLfloat, 8> glTargetPrimaries = {
targetPrimaries->value().red.x, targetPrimaries->value().red.y, targetPrimaries->value().green.x, targetPrimaries->value().green.y,
targetPrimaries->value().blue.x, targetPrimaries->value().blue.y, targetPrimaries->value().white.x, targetPrimaries->value().white.y,
const auto mat = targetPrimaries->value().toXYZ().mat();
const std::array<GLfloat, 9> glTargetPrimariesXYZ = {
mat[0][0], mat[1][0], mat[2][0], //
mat[0][1], mat[1][1], mat[2][1], //
mat[0][2], mat[1][2], mat[2][2], //
};
shader->setUniformMatrix4x2fv(SHADER_TARGET_PRIMARIES, 1, false, glTargetPrimaries);
shader->setUniformMatrix3fv(SHADER_TARGET_PRIMARIES_XYZ, 1, false, glTargetPrimariesXYZ);
const bool needsSDRmod = modifySDR && isSDR2HDR(imageDescription->value(), targetImageDescription->value());
const bool needsHDRmod = !needsSDRmod && isHDR2SDR(imageDescription->value(), targetImageDescription->value());
@ -1364,10 +1368,17 @@ void CHyprOpenGLImpl::renderTextureInternal(SP<CTexture> tex, const CBox& box, c
m_renderData.pMonitor->inFullscreenMode()) /* Fullscreen window with pass cm enabled */;
if (!skipCM && !usingFinalShader) {
if (!data.discardActive) {
if (texType == TEXTURE_RGBA)
shader = m_shaders->frag[SH_FRAG_CM_RGBA];
else if (texType == TEXTURE_RGBX)
shader = m_shaders->frag[SH_FRAG_CM_RGBX];
} else {
if (texType == TEXTURE_RGBA)
shader = m_shaders->frag[SH_FRAG_CM_RGBA_DISCARD];
else if (texType == TEXTURE_RGBX)
shader = m_shaders->frag[SH_FRAG_CM_RGBA_DISCARD];
}
shader = useShader(shader);
@ -1487,20 +1498,33 @@ void CHyprOpenGLImpl::renderTextureInternal(SP<CTexture> tex, const CBox& box, c
}
glBindVertexArray(shader->getUniformLocation(SHADER_SHADER_VAO));
if (data.allowCustomUV && m_renderData.primarySurfaceUVTopLeft != Vector2D(-1, -1)) {
const float customUVs[] = {
m_renderData.primarySurfaceUVBottomRight.x, m_renderData.primarySurfaceUVTopLeft.y, m_renderData.primarySurfaceUVTopLeft.x,
m_renderData.primarySurfaceUVTopLeft.y, m_renderData.primarySurfaceUVBottomRight.x, m_renderData.primarySurfaceUVBottomRight.y,
m_renderData.primarySurfaceUVTopLeft.x, m_renderData.primarySurfaceUVBottomRight.y,
};
glBindBuffer(GL_ARRAY_BUFFER, shader->getUniformLocation(SHADER_SHADER_VBO));
glBindBuffer(GL_ARRAY_BUFFER, shader->getUniformLocation(SHADER_SHADER_VBO_UV));
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(customUVs), customUVs);
} else {
glBindBuffer(GL_ARRAY_BUFFER, shader->getUniformLocation(SHADER_SHADER_VBO_UV));
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(fullVerts), fullVerts);
// this tells GPU can keep reading the old block for previous draws while the CPU writes to a new one.
// to avoid stalls if renderTextureInternal is called multiple times on same renderpass
// at the cost of some temporar vram usage.
glBufferData(GL_ARRAY_BUFFER, sizeof(fullVerts), nullptr, GL_DYNAMIC_DRAW);
auto verts = fullVerts;
if (data.allowCustomUV && m_renderData.primarySurfaceUVTopLeft != Vector2D(-1, -1)) {
const float u0 = m_renderData.primarySurfaceUVTopLeft.x;
const float v0 = m_renderData.primarySurfaceUVTopLeft.y;
const float u1 = m_renderData.primarySurfaceUVBottomRight.x;
const float v1 = m_renderData.primarySurfaceUVBottomRight.y;
verts[0].u = u0;
verts[0].v = v0;
verts[1].u = u0;
verts[1].v = v1;
verts[2].u = u1;
verts[2].v = v0;
verts[3].u = u1;
verts[3].v = v1;
}
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(verts), verts.data());
if (!m_renderData.clipBox.empty() || !m_renderData.clipRegion.empty()) {
CRegion damageClip = m_renderData.clipBox;

View File

@ -35,12 +35,18 @@
struct gbm_device;
class CHyprRenderer;
inline const float fullVerts[] = {
1, 0, // top right
0, 0, // top left
1, 1, // bottom right
0, 1, // bottom left
struct SVertex {
float x, y; // position
float u, v; // uv
};
constexpr std::array<SVertex, 4> fullVerts = {{
{0.0f, 0.0f, 0.0f, 0.0f}, // top-left
{0.0f, 1.0f, 0.0f, 1.0f}, // bottom-left
{1.0f, 0.0f, 1.0f, 0.0f}, // top-right
{1.0f, 1.0f, 1.0f, 1.0f}, // bottom-right
}};
inline const float fanVertsFull[] = {-1.0f, -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, -1.0f, 1.0f};
enum eDiscardMode : uint8_t {
@ -98,7 +104,9 @@ enum ePreparedFragmentShader : uint8_t {
SH_FRAG_BORDER1,
SH_FRAG_GLITCH,
SH_FRAG_CM_RGBA,
SH_FRAG_CM_RGBA_DISCARD,
SH_FRAG_CM_RGBX,
SH_FRAG_CM_RGBX_DISCARD,
SH_FRAG_LAST,
};

View File

@ -132,7 +132,7 @@ void CShader::getUniformLocations() {
m_uniformLocations[SHADER_TARGET_TF] = getUniform("targetTF");
m_uniformLocations[SHADER_SRC_TF_RANGE] = getUniform("srcTFRange");
m_uniformLocations[SHADER_DST_TF_RANGE] = getUniform("dstTFRange");
m_uniformLocations[SHADER_TARGET_PRIMARIES] = getUniform("targetPrimaries");
m_uniformLocations[SHADER_TARGET_PRIMARIES_XYZ] = getUniform("targetPrimariesXYZ");
m_uniformLocations[SHADER_MAX_LUMINANCE] = getUniform("maxLuminance");
m_uniformLocations[SHADER_SRC_REF_LUMINANCE] = getUniform("srcRefLuminance");
m_uniformLocations[SHADER_DST_MAX_LUMINANCE] = getUniform("dstMaxLuminance");
@ -208,7 +208,7 @@ void CShader::getUniformLocations() {
}
void CShader::createVao() {
GLuint shaderVao = 0, shaderVbo = 0, shaderVboUv = 0;
GLuint shaderVao = 0, shaderVbo = 0;
glGenVertexArrays(1, &shaderVao);
glBindVertexArray(shaderVao);
@ -216,30 +216,26 @@ void CShader::createVao() {
if (m_uniformLocations[SHADER_POS_ATTRIB] != -1) {
glGenBuffers(1, &shaderVbo);
glBindBuffer(GL_ARRAY_BUFFER, shaderVbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(fullVerts), fullVerts, GL_STATIC_DRAW);
glBufferData(GL_ARRAY_BUFFER, sizeof(fullVerts), fullVerts.data(), GL_DYNAMIC_DRAW);
glEnableVertexAttribArray(m_uniformLocations[SHADER_POS_ATTRIB]);
glVertexAttribPointer(m_uniformLocations[SHADER_POS_ATTRIB], 2, GL_FLOAT, GL_FALSE, 0, nullptr);
glVertexAttribPointer(m_uniformLocations[SHADER_POS_ATTRIB], 2, GL_FLOAT, GL_FALSE, sizeof(SVertex), (void*)offsetof(SVertex, x));
}
// UV VBO (dynamic, may be updated per frame)
if (m_uniformLocations[SHADER_TEX_ATTRIB] != -1) {
glGenBuffers(1, &shaderVboUv);
glBindBuffer(GL_ARRAY_BUFFER, shaderVboUv);
glBufferData(GL_ARRAY_BUFFER, sizeof(fullVerts), fullVerts, GL_DYNAMIC_DRAW); // Initial dummy UVs
if (m_uniformLocations[SHADER_TEX_ATTRIB] != -1 && shaderVbo != 0) {
glBindBuffer(GL_ARRAY_BUFFER, shaderVbo);
glEnableVertexAttribArray(m_uniformLocations[SHADER_TEX_ATTRIB]);
glVertexAttribPointer(m_uniformLocations[SHADER_TEX_ATTRIB], 2, GL_FLOAT, GL_FALSE, 0, nullptr);
glVertexAttribPointer(m_uniformLocations[SHADER_TEX_ATTRIB], 2, GL_FLOAT, GL_FALSE, sizeof(SVertex), (void*)offsetof(SVertex, u));
}
glBindVertexArray(0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
m_uniformLocations[SHADER_SHADER_VAO] = shaderVao;
m_uniformLocations[SHADER_SHADER_VBO_POS] = shaderVbo;
m_uniformLocations[SHADER_SHADER_VBO_UV] = shaderVboUv;
m_uniformLocations[SHADER_SHADER_VBO] = shaderVbo;
RASSERT(m_uniformLocations[SHADER_SHADER_VAO] >= 0, "SHADER_SHADER_VAO could not be created");
RASSERT(m_uniformLocations[SHADER_SHADER_VBO_POS] >= 0, "SHADER_SHADER_VBO_POS could not be created");
RASSERT(m_uniformLocations[SHADER_SHADER_VBO_UV] >= 0, "SHADER_SHADER_VBO_UV could not be created");
RASSERT(m_uniformLocations[SHADER_SHADER_VBO] >= 0, "SHADER_SHADER_VBO_POS could not be created");
}
void CShader::setUniformInt(eShaderUniform location, GLint v0) {
@ -390,11 +386,10 @@ void CShader::destroy() {
if (m_program == 0)
return;
GLuint shaderVao, shaderVbo, shaderVboUv;
GLuint shaderVao, shaderVbo;
shaderVao = m_uniformLocations[SHADER_SHADER_VAO] == -1 ? 0 : m_uniformLocations[SHADER_SHADER_VAO];
shaderVbo = m_uniformLocations[SHADER_SHADER_VBO_POS] == -1 ? 0 : m_uniformLocations[SHADER_SHADER_VBO_POS];
shaderVboUv = m_uniformLocations[SHADER_SHADER_VBO_UV] == -1 ? 0 : m_uniformLocations[SHADER_SHADER_VBO_UV];
shaderVbo = m_uniformLocations[SHADER_SHADER_VBO] == -1 ? 0 : m_uniformLocations[SHADER_SHADER_VBO];
if (shaderVao)
glDeleteVertexArrays(1, &shaderVao);
@ -402,9 +397,6 @@ void CShader::destroy() {
if (shaderVbo)
glDeleteBuffers(1, &shaderVbo);
if (shaderVboUv)
glDeleteBuffers(1, &shaderVboUv);
glDeleteProgram(m_program);
m_program = 0;
}

View File

@ -14,7 +14,7 @@ enum eShaderUniform : uint8_t {
SHADER_TARGET_TF,
SHADER_SRC_TF_RANGE,
SHADER_DST_TF_RANGE,
SHADER_TARGET_PRIMARIES,
SHADER_TARGET_PRIMARIES_XYZ,
SHADER_MAX_LUMINANCE,
SHADER_SRC_REF_LUMINANCE,
SHADER_DST_MAX_LUMINANCE,
@ -31,8 +31,7 @@ enum eShaderUniform : uint8_t {
SHADER_DISCARD_ALPHA,
SHADER_DISCARD_ALPHA_VALUE,
SHADER_SHADER_VAO,
SHADER_SHADER_VBO_POS,
SHADER_SHADER_VBO_UV,
SHADER_SHADER_VBO,
SHADER_TOP_LEFT,
SHADER_BOTTOM_RIGHT,
SHADER_FULL_SIZE,

View File

@ -401,13 +401,12 @@ vec4 tonemap(vec4 color, mat3 dstXYZ) {
return vec4(fromLMS * toLinear(vec4(ICtCpPQInv * ICtCp, 1.0), CM_TRANSFER_FUNCTION_ST2084_PQ).rgb * HDR_MAX_LUMINANCE * refScale, color[3]);
}
vec4 doColorManagement(vec4 pixColor, int srcTF, int dstTF, mat4x2 dstPrimaries) {
vec4 doColorManagement(vec4 pixColor, int srcTF, int dstTF, mat3 dstxyz) {
pixColor.rgb /= max(pixColor.a, 0.001);
pixColor.rgb = toLinearRGB(pixColor.rgb, srcTF);
pixColor.rgb = convertMatrix * pixColor.rgb;
pixColor = toNit(pixColor, srcTFRange);
pixColor.rgb *= pixColor.a;
mat3 dstxyz = primaries2xyz(dstPrimaries);
pixColor = tonemap(pixColor, dstxyz);
pixColor = fromLinearNit(pixColor, dstTF, dstTFRange);
if ((srcTF == CM_TRANSFER_FUNCTION_SRGB || srcTF == CM_TRANSFER_FUNCTION_GAMMA22) && dstTF == CM_TRANSFER_FUNCTION_ST2084_PQ) {

View File

@ -6,7 +6,7 @@ in vec2 v_texcoord;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat4x2 targetPrimaries;
uniform mat3 targetPrimariesXYZ;
uniform vec2 fullSizeUntransformed;
uniform float radiusOuter;
@ -90,7 +90,7 @@ void main() {
pixColor = getColorForCoord(v_texcoord);
pixColor.rgb *= pixColor[3];
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimaries);
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
pixColor *= alpha * additionalAlpha;

View File

@ -7,14 +7,9 @@ uniform sampler2D tex;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat4x2 targetPrimaries;
uniform mat3 targetPrimariesXYZ;
uniform float alpha;
uniform bool discardOpaque;
uniform bool discardAlpha;
uniform float discardAlphaValue;
uniform bool applyTint;
uniform vec3 tint;
@ -25,14 +20,8 @@ layout(location = 0) out vec4 fragColor;
void main() {
vec4 pixColor = texture(tex, v_texcoord);
if (discardOpaque && pixColor.a * alpha == 1.0)
discard;
if (discardAlpha && pixColor.a <= discardAlphaValue)
discard;
// this shader shouldn't be used when skipCM == 1
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimaries);
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
if (applyTint)
pixColor.rgb *= tint;

View File

@ -0,0 +1,44 @@
#version 300 es
#extension GL_ARB_shading_language_include : enable
precision highp float;
in vec2 v_texcoord;
uniform sampler2D tex;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat3 targetPrimariesXYZ;
uniform float alpha;
uniform bool discardOpaque;
uniform bool discardAlpha;
uniform float discardAlphaValue;
uniform bool applyTint;
uniform vec3 tint;
#include "rounding.glsl"
#include "CM.glsl"
layout(location = 0) out vec4 fragColor;
void main() {
vec4 pixColor = texture(tex, v_texcoord);
if (discardOpaque && pixColor.a * alpha == 1.0)
discard;
if (discardAlpha && pixColor.a <= discardAlphaValue)
discard;
// this shader shouldn't be used when skipCM == 1
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
if (applyTint)
pixColor.rgb *= tint;
if (radius > 0.0)
pixColor = rounding(pixColor);
fragColor = pixColor * alpha;
}

View File

@ -7,14 +7,9 @@ uniform sampler2D tex;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat4x2 targetPrimaries;
uniform mat3 targetPrimariesXYZ;
uniform float alpha;
uniform bool discardOpaque;
uniform bool discardAlpha;
uniform float discardAlphaValue;
uniform bool applyTint;
uniform vec3 tint;
@ -25,14 +20,8 @@ layout(location = 0) out vec4 fragColor;
void main() {
vec4 pixColor = vec4(texture(tex, v_texcoord).rgb, 1.0);
if (discardOpaque && pixColor.a * alpha == 1.0)
discard;
if (discardAlpha && pixColor.a <= discardAlphaValue)
discard;
// this shader shouldn't be used when skipCM == 1
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimaries);
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
if (applyTint)
pixColor.rgb *= tint;

View File

@ -0,0 +1,44 @@
#version 300 es
#extension GL_ARB_shading_language_include : enable
precision highp float;
in vec2 v_texcoord;
uniform sampler2D tex;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat3 targetPrimariesXYZ;
uniform float alpha;
uniform bool discardOpaque;
uniform bool discardAlpha;
uniform float discardAlphaValue;
uniform bool applyTint;
uniform vec3 tint;
#include "rounding.glsl"
#include "CM.glsl"
layout(location = 0) out vec4 fragColor;
void main() {
vec4 pixColor = vec4(texture(tex, v_texcoord).rgb, 1.0);
if (discardOpaque && pixColor.a * alpha == 1.0)
discard;
if (discardAlpha && pixColor.a <= discardAlphaValue)
discard;
// this shader shouldn't be used when skipCM == 1
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
if (applyTint)
pixColor.rgb *= tint;
if (radius > 0.0)
pixColor = rounding(pixColor);
fragColor = pixColor * alpha;
}

View File

@ -8,7 +8,7 @@ in vec2 v_texcoord;
uniform int skipCM;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat4x2 targetPrimaries;
uniform mat3 targetPrimariesXYZ;
uniform vec2 topLeft;
uniform vec2 bottomRight;
@ -93,7 +93,7 @@ void main() {
pixColor.rgb *= pixColor[3];
if (skipCM == 0)
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimaries);
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
fragColor = pixColor;
}