Skip to content

Commit b32d4f9

Browse files
author
borisrp
committed
Small optim for kuwahara when 1080p
1 parent f6d1e44 commit b32d4f9

1 file changed

Lines changed: 65 additions & 33 deletions

File tree

hrt/prefab/rfx/KuwaharaFilter.hx

Lines changed: 65 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,21 @@ class KuwaharaShader extends hrt.shader.PbrShader {
1212

1313
@param var texture : Sampler2D;
1414

15+
@const var FETCH_OPTIM_2x2 : Bool;
16+
17+
//interlaced 5x5 masks for the 4 regions
18+
final mask : Array<Vec4, 25> = [
19+
vec4(1.0,0.0,0.0,0.0), vec4(1.0,0.0,0.0,0.0), vec4(1.0,1.0,0.0,0.0), vec4(0.0,1.0,0.0,0.0), vec4(0.0,1.0,0.0,0.0),
20+
vec4(1.0,0.0,0.0,0.0), vec4(1.0,0.0,0.0,0.0), vec4(1.0,1.0,0.0,0.0), vec4(0.0,1.0,0.0,0.0), vec4(0.0,1.0,0.0,0.0),
21+
vec4(1.0,0.0,0.0,1.0), vec4(1.0,0.0,0.0,1.0), vec4(1.0,1.0,1.0,1.0), vec4(0.0,1.0,1.0,0.0), vec4(0.0,1.0,1.0,0.0),
22+
vec4(0.0,0.0,0.0,1.0), vec4(0.0,0.0,0.0,1.0), vec4(0.0,0.0,1.0,1.0), vec4(0.0,0.0,1.0,0.0), vec4(0.0,0.0,1.0,0.0),
23+
vec4(0.0,0.0,0.0,1.0), vec4(0.0,0.0,0.0,1.0), vec4(0.0,0.0,1.0,1.0), vec4(0.0,0.0,1.0,0.0), vec4(0.0,0.0,1.0,0.0)
24+
];
25+
1526
function fragment() {
1627
var size = texture.size();
1728
var invSize = 1.0 / size;
18-
var n = float((scaledRadius + 1) * (scaledRadius + 1));
29+
var invN = 1.0 /float((scaledRadius + 1) * (scaledRadius + 1));
1930
var m0 = vec3(0.0);
2031
var m1 = vec3(0.0);
2132
var m2 = vec3(0.0);
@@ -26,42 +37,60 @@ class KuwaharaShader extends hrt.shader.PbrShader {
2637
var s2 = vec3(0.0);
2738
var s3 = vec3(0.0);
2839

29-
for ( j in -scaledRadius...1 ) {
30-
for ( i in -scaledRadius...1 ) {
31-
var c = texture.get(calculatedUV + vec2(i,j) * invSize).rgb;
32-
m0 += c;
33-
s0 += c * c;
40+
if(FETCH_OPTIM_2x2){
41+
var cursor = 0;
42+
@unroll for ( j in -scaledRadius...scaledRadius+1) {
43+
@unroll for ( i in -scaledRadius...scaledRadius+1) {
44+
var c = texture.get(calculatedUV + vec2(i,j) * invSize).rgb;
45+
var cc = c * c;
46+
m0 += c * mask[cursor].x;
47+
s0 += cc * mask[cursor].x;
48+
m1 += c * mask[cursor].y;
49+
s1 += cc * mask[cursor].y;
50+
m2 += c * mask[cursor].z;
51+
s2 += cc * mask[cursor].z;
52+
m3 += c * mask[cursor].w;
53+
s3 += cc * mask[cursor].w;
54+
cursor++;
55+
}
56+
}
57+
} else {
58+
for ( j in -scaledRadius...1 ) {
59+
for ( i in -scaledRadius...1 ) {
60+
var c = texture.get(calculatedUV + vec2(i,j) * invSize).rgb;
61+
m0 += c;
62+
s0 += c * c;
63+
}
3464
}
35-
}
3665

37-
for ( j in -scaledRadius...1 ) {
38-
for ( i in 0...scaledRadius + 1) {
39-
var c = texture.get(calculatedUV + vec2(i,j) * invSize).rgb;
40-
m1 += c;
41-
s1 += c * c;
66+
for ( j in -scaledRadius...1 ) {
67+
for ( i in 0...scaledRadius + 1) {
68+
var c = texture.get(calculatedUV + vec2(i,j) * invSize).rgb;
69+
m1 += c;
70+
s1 += c * c;
71+
}
4272
}
43-
}
4473

45-
for ( j in 0...scaledRadius + 1 ) {
46-
for ( i in 0...scaledRadius + 1) {
47-
var c = texture.get(calculatedUV + vec2(i,j) * invSize).rgb;
48-
m2 += c;
49-
s2 += c * c;
74+
for ( j in 0...scaledRadius + 1 ) {
75+
for ( i in 0...scaledRadius + 1) {
76+
var c = texture.get(calculatedUV + vec2(i,j) * invSize).rgb;
77+
m2 += c;
78+
s2 += c * c;
79+
}
5080
}
51-
}
5281

53-
for ( j in 0... scaledRadius + 1 ) {
54-
for ( i in -scaledRadius...1 ) {
55-
var c = texture.get(calculatedUV + vec2(i,j) * invSize).rgb;
56-
m3 += c;
57-
s3 += c * c;
82+
for ( j in 0... scaledRadius + 1 ) {
83+
for ( i in -scaledRadius...1 ) {
84+
var c = texture.get(calculatedUV + vec2(i,j) * invSize).rgb;
85+
m3 += c;
86+
s3 += c * c;
87+
}
5888
}
5989
}
6090

61-
6291
var minSigma2 = 1e+10;
63-
m0 /= n;
64-
s0 = abs(s0 / n - m0 * m0);
92+
m0 *= invN;
93+
s0 = abs(s0 * invN - m0 * m0);
6594

6695
var filteredColor = vec3(0.0);
6796

@@ -71,26 +100,26 @@ class KuwaharaShader extends hrt.shader.PbrShader {
71100
filteredColor = vec3(m0);
72101
}
73102

74-
m1 /= n;
75-
s1 = abs(s1 / n - m1 * m1);
103+
m1 *= invN;
104+
s1 = abs(s1 * invN - m1 * m1);
76105

77106
sigma2 = s1.r + s1.g + s1.b;
78107
if (sigma2 < minSigma2) {
79108
minSigma2 = sigma2;
80109
filteredColor = vec3(m1);
81110
}
82111

83-
m2 /= n;
84-
s2 = abs(s2 / n - m2 * m2);
112+
m2 *= invN;
113+
s2 = abs(s2 * invN - m2 * m2);
85114

86115
sigma2 = s2.r + s2.g + s2.b;
87116
if (sigma2 < minSigma2) {
88117
minSigma2 = sigma2;
89118
filteredColor = vec3(m2);
90119
}
91120

92-
m3 /= n;
93-
s3 = abs(s3 / n - m3 * m3);
121+
m3 *= invN;
122+
s3 = abs(s3 * invN - m3 * m3);
94123

95124
sigma2 = s3.r + s3.g + s3.b;
96125
if (sigma2 < minSigma2) {
@@ -126,17 +155,20 @@ class KuwaharaFilter extends RendererFX {
126155

127156
function execute(r : h3d.scene.Renderer) {
128157
r.mark("Kuwahara");
158+
r.ctx.engine.driver.beginEvent("Kuwahara");
129159

130160
var input = getInput(r);
131161
pass.shader.texture = input;
132162

133163
pass.shader.scaledRadius = Std.int(radius * hxd.Math.max(input.width / 1920, input.height / 1080));
164+
pass.shader.FETCH_OPTIM_2x2 = pass.shader.scaledRadius == 2 ? true : false;
134165
pass.shader.startOpacity = startOpacity;
135166
pass.shader.endOpacity = endOpacity;
136167
pass.shader.startDist = startDist;
137168
pass.shader.endDist = endDist;
138169
pass.pass.setBlendMode(Alpha);
139170
pass.render();
171+
r.ctx.engine.driver.endEvent();
140172
}
141173

142174
override function begin(r:h3d.scene.Renderer, step:h3d.impl.RendererFX.Step) {

0 commit comments

Comments
 (0)