From 2e34aa8a1de8c0f8c0786b050fad6e1bdea63e22 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 10 Sep 2019 23:23:37 +0200 Subject: [PATCH 1/3] Soft Light: speedup and reduce memory usage --- rtengine/ipsoftlight.cc | 115 +++++++++++++++++++++++++++++++--------- 1 file changed, 90 insertions(+), 25 deletions(-) diff --git a/rtengine/ipsoftlight.cc b/rtengine/ipsoftlight.cc index c7a4d1af7..1d94a29c2 100644 --- a/rtengine/ipsoftlight.cc +++ b/rtengine/ipsoftlight.cc @@ -3,6 +3,7 @@ * This file is part of RawTherapee. * * Copyright 2018 Alberto Griggio + * Optimized 2019 Ingo Weyrich * * RawTherapee is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,14 +19,10 @@ * along with RawTherapee. If not, see . */ -#ifdef _OPENMP -#include -#endif - #include "improcfun.h" - #include "procparams.h" - +#define BENCHMARK +#include "StopWatch.h" namespace rtengine { namespace { @@ -33,18 +30,29 @@ namespace { inline float sl(float blend, float x) { if (!OOG(x)) { - const float orig = 1.f - blend; float v = Color::gamma_srgb(x) / MAXVALF; - // Pegtop's formula from + // using Pegtop's formula from // https://en.wikipedia.org/wiki/Blend_modes#Soft_Light - float v2 = v * v; - float v22 = v2 * 2.f; - v = v2 + v22 - v22 * v; - x = blend * Color::igamma_srgb(v * MAXVALF) + orig * x; + // const float orig = 1.f - blend; + // float v2 = v * v; + // float v22 = v2 * 2.f; + // v = v2 + v22 - v22 * v; + // return blend * Color::igamma_srgb(v * MAXVALF) + orig * x; + + // using optimized formula (heckflosse67@gmx.de) + return intp(blend, Color::igamma_srgb(v * v * (3.f - 2.f * v) * MAXVALF), x); } return x; } +#ifdef __SSE2__ +inline vfloat sl(vfloat blend, vfloat x) +{ + const vfloat v = Color::gammatab_srgb[x] / F2V(MAXVALF); + return vself(vmaskf_gt(x, F2V(MAXVALF)), x, vself(vmaskf_lt(x, ZEROV), x, vintpf(blend, Color::igammatab_srgb[v * v * (F2V(3.f) - (v + v)) * MAXVALF], x))); +} +#endif + } // namespace @@ -53,24 +61,81 @@ void ImProcFunctions::softLight(LabImage *lab) if (!params->softlight.enabled || !params->softlight.strength) { return; } + BENCHFUN - Imagefloat working(lab->W, lab->H); - lab2rgb(*lab, working, params->icm.workingProfile); + TMatrix wprof = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); + const float wp[3][3] = { + {static_cast (wprof[0][0]), static_cast (wprof[0][1]), static_cast (wprof[0][2])}, + {static_cast (wprof[1][0]), static_cast (wprof[1][1]), static_cast (wprof[1][2])}, + {static_cast (wprof[2][0]), static_cast (wprof[2][1]), static_cast (wprof[2][2])} + }; - const float blend = params->softlight.strength / 100.f; + TMatrix wiprof = ICCStore::getInstance()->workingSpaceInverseMatrix(params->icm.workingProfile); + const float wip[3][3] = { + {static_cast (wiprof[0][0]), static_cast (wiprof[0][1]), static_cast (wiprof[0][2])}, + {static_cast (wiprof[1][0]), static_cast (wiprof[1][1]), static_cast (wiprof[1][2])}, + {static_cast (wiprof[2][0]), static_cast (wiprof[2][1]), static_cast (wiprof[2][2])} + }; -#ifdef _OPENMP - #pragma omp parallel for -#endif - for (int y = 0; y < working.getHeight(); ++y) { - for (int x = 0; x < working.getWidth(); ++x) { - working.r(y, x) = sl(blend, working.r(y, x)); - working.g(y, x) = sl(blend, working.g(y, x)); - working.b(y, x) = sl(blend, working.b(y, x)); +#ifdef __SSE2__ + vfloat wipv[3][3]; + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + wipv[i][j] = F2V(wiprof[i][j]); + } + } + + vfloat wpv[3][3]; + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + wpv[i][j] = F2V(wprof[i][j]); + } + } +#endif + +#ifdef _OPENMP + #pragma omp parallel +#endif + { + const float blend = params->softlight.strength / 100.f; +#ifdef __SSE2__ + const vfloat blendv = F2V(blend); +#endif +#ifdef _OPENMP + #pragma omp for schedule(dynamic,16) +#endif + for (int i = 0; i < lab->H; ++i) { + int j = 0; +#ifdef __SSE2__ + for (; j < lab->W - 3; j += 4) { + vfloat Xv, Yv, Zv; + vfloat Rv, Gv, Bv; + Color::Lab2XYZ(LVFU(lab->L[i][j]),LVFU (lab->a[i][j]),LVFU (lab->b[i][j]), Xv, Yv, Zv); + Color::xyz2rgb(Xv, Yv, Zv, Rv, Gv, Bv, wipv); + Rv = sl(blendv, Rv); + Gv = sl(blendv, Gv); + Bv = sl(blendv, Bv); + Color::rgbxyz(Rv, Gv, Bv, Xv, Yv, Zv, wpv); + for (int k = 0; k < 4; ++k) { + Color::XYZ2Lab(Xv[k], Yv[k], Zv[k], lab->L[i][j + k], lab->a[i][j + k], lab->b[i][j+ k]); + } + } +#endif + for (; j < lab->W; j++) { + float X, Y, Z; + float R, G, B; + Color::Lab2XYZ(lab->L[i][j], lab->a[i][j], lab->b[i][j], X, Y, Z); + Color::xyz2rgb(X, Y, Z, R, G, B, wip); + R = sl(blend, R); + G = sl(blend, G); + B = sl(blend, B); + Color::rgbxyz(R, G, B, X, Y, Z, wp); + Color::XYZ2Lab(X, Y, Z, lab->L[i][j], lab->a[i][j], lab->b[i][j]); + } } } - - rgb2lab(working, *lab, params->icm.workingProfile); } } // namespace rtengine From 1e75f38dba6b95d380f1d97fb9ec61748a139ff1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fl=C3=B6ssie?= Date: Thu, 12 Sep 2019 14:49:51 +0200 Subject: [PATCH 2/3] Softlight cleanups - More `const` - Removed `using namespace` - Whitespace cleanups --- rtengine/ipsoftlight.cc | 57 ++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/rtengine/ipsoftlight.cc b/rtengine/ipsoftlight.cc index 1d94a29c2..556790eb4 100644 --- a/rtengine/ipsoftlight.cc +++ b/rtengine/ipsoftlight.cc @@ -20,17 +20,17 @@ */ #include "improcfun.h" + #include "procparams.h" #define BENCHMARK #include "StopWatch.h" -namespace rtengine { namespace { inline float sl(float blend, float x) { - if (!OOG(x)) { - float v = Color::gamma_srgb(x) / MAXVALF; + if (!rtengine::OOG(x)) { + float v = rtengine::Color::gamma_srgb(x) / rtengine::MAXVALF; // using Pegtop's formula from // https://en.wikipedia.org/wiki/Blend_modes#Soft_Light // const float orig = 1.f - blend; @@ -40,7 +40,7 @@ inline float sl(float blend, float x) // return blend * Color::igamma_srgb(v * MAXVALF) + orig * x; // using optimized formula (heckflosse67@gmx.de) - return intp(blend, Color::igamma_srgb(v * v * (3.f - 2.f * v) * MAXVALF), x); + return rtengine::intp(blend, rtengine::Color::igamma_srgb(v * v * (3.f - 2.f * v) * rtengine::MAXVALF), x); } return x; } @@ -48,51 +48,46 @@ inline float sl(float blend, float x) #ifdef __SSE2__ inline vfloat sl(vfloat blend, vfloat x) { - const vfloat v = Color::gammatab_srgb[x] / F2V(MAXVALF); - return vself(vmaskf_gt(x, F2V(MAXVALF)), x, vself(vmaskf_lt(x, ZEROV), x, vintpf(blend, Color::igammatab_srgb[v * v * (F2V(3.f) - (v + v)) * MAXVALF], x))); + const vfloat v = rtengine::Color::gammatab_srgb[x] / F2V(rtengine::MAXVALF); + return vself(vmaskf_gt(x, F2V(rtengine::MAXVALF)), x, vself(vmaskf_lt(x, ZEROV), x, vintpf(blend, rtengine::Color::igammatab_srgb[v * v * (F2V(3.f) - (v + v)) * rtengine::MAXVALF], x))); } #endif } // namespace - -void ImProcFunctions::softLight(LabImage *lab) +void rtengine::ImProcFunctions::softLight(LabImage *lab) { if (!params->softlight.enabled || !params->softlight.strength) { return; } BENCHFUN - TMatrix wprof = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); + const TMatrix wprof = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); const float wp[3][3] = { - {static_cast (wprof[0][0]), static_cast (wprof[0][1]), static_cast (wprof[0][2])}, - {static_cast (wprof[1][0]), static_cast (wprof[1][1]), static_cast (wprof[1][2])}, - {static_cast (wprof[2][0]), static_cast (wprof[2][1]), static_cast (wprof[2][2])} + {static_cast(wprof[0][0]), static_cast(wprof[0][1]), static_cast(wprof[0][2])}, + {static_cast(wprof[1][0]), static_cast(wprof[1][1]), static_cast(wprof[1][2])}, + {static_cast(wprof[2][0]), static_cast(wprof[2][1]), static_cast(wprof[2][2])} }; - TMatrix wiprof = ICCStore::getInstance()->workingSpaceInverseMatrix(params->icm.workingProfile); + const TMatrix wiprof = ICCStore::getInstance()->workingSpaceInverseMatrix(params->icm.workingProfile); const float wip[3][3] = { - {static_cast (wiprof[0][0]), static_cast (wiprof[0][1]), static_cast (wiprof[0][2])}, - {static_cast (wiprof[1][0]), static_cast (wiprof[1][1]), static_cast (wiprof[1][2])}, - {static_cast (wiprof[2][0]), static_cast (wiprof[2][1]), static_cast (wiprof[2][2])} + {static_cast(wiprof[0][0]), static_cast(wiprof[0][1]), static_cast(wiprof[0][2])}, + {static_cast(wiprof[1][0]), static_cast(wiprof[1][1]), static_cast(wiprof[1][2])}, + {static_cast(wiprof[2][0]), static_cast(wiprof[2][1]), static_cast(wiprof[2][2])} }; #ifdef __SSE2__ - vfloat wipv[3][3]; + const vfloat wpv[3][3] = { + {F2V(wprof[0][0]), F2V(wprof[0][1]), F2V(wprof[0][2])}, + {F2V(wprof[1][0]), F2V(wprof[1][1]), F2V(wprof[1][2])}, + {F2V(wprof[2][0]), F2V(wprof[2][1]), F2V(wprof[2][2])} + }; - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - wipv[i][j] = F2V(wiprof[i][j]); - } - } - - vfloat wpv[3][3]; - - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - wpv[i][j] = F2V(wprof[i][j]); - } - } + const vfloat wipv[3][3] = { + {F2V(wiprof[0][0]), F2V(wiprof[0][1]), F2V(wiprof[0][2])}, + {F2V(wiprof[1][0]), F2V(wiprof[1][1]), F2V(wiprof[1][2])}, + {F2V(wiprof[2][0]), F2V(wiprof[2][1]), F2V(wiprof[2][2])} + }; #endif #ifdef _OPENMP @@ -137,5 +132,3 @@ void ImProcFunctions::softLight(LabImage *lab) } } } - -} // namespace rtengine From f55afb91c306c3449f579bff0f1f6cfdcde8c831 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 15 Sep 2019 22:30:03 +0200 Subject: [PATCH 3/3] Soft Light: remove benchmark code, closes #5447 --- rtengine/ipsoftlight.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/rtengine/ipsoftlight.cc b/rtengine/ipsoftlight.cc index 556790eb4..cd49e858f 100644 --- a/rtengine/ipsoftlight.cc +++ b/rtengine/ipsoftlight.cc @@ -22,8 +22,6 @@ #include "improcfun.h" #include "procparams.h" -#define BENCHMARK -#include "StopWatch.h" namespace { @@ -60,7 +58,6 @@ void rtengine::ImProcFunctions::softLight(LabImage *lab) if (!params->softlight.enabled || !params->softlight.strength) { return; } - BENCHFUN const TMatrix wprof = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); const float wp[3][3] = {