Filter 3X3

I thought I would post the vertex and fragment shaders that I wrote to have a 3X3 filter running full resolution at 60fps.

// Here are three examples of filters that can have a blur effect
float const Filter3X3LowPass[9] =
{0.2222f, 0.2222f, 0.2222f,
0.2222f, 0.2222f, 0.2222f,
0.2222f, 0.2222f, 0.2222f};
float const Filter3X3LowPassInverseGain = 2.0f;

float const Filter3X3Gauss[9] =
{0.125f, 0.25f, 0.125f,
0.25f, 0.5f, 0.25f,
0.125f, 0.25f, 0.125f};
float const Filter3X3GaussInverseGain = 2.0f;

float const Filter3X3Blur[9] =
{0.4f, 0.0f, 0.4f,
0.0f, 0.4f, 0.0f,
0.4f, 0.0f, 0.4f};
float const Filter3X3BlurInverseGain = 2.0f;

//
// Filter3X3.fsh (Fragment Shader)
//
// Created by Ryan Steder on 11/9/11.
// Copyright 2011 Ryan Steder. All rights reserved.
//
// 3X3 Filter using OpenGL ES 2.0, for use with iPhone, iPod Touch, iPad
// optimized for realtime, 60fps, fullscreen rendering
// NOTE: this could be changed to use a 5X5 filter
// but would likely not run at 60fps fullscreen, but
// most likely possible to apply 5X5 at 30fps fullscreen
//

// the source texture the min and mag filter should be linear
uniform lowp sampler2D u_s_source;

// + and – filter values are to sum up to <= 2.0 and >= -2.0, respectivly
// this is to ensure the range of low precision [-2.0, 2.0]
// and to get finer grain calculations of filter in real time
uniform lowp mat3 u_m3_filter;

// at end of filter calculation it is divided by the inverse gain
// the inverse gain will likely be 2.0, to accomplish a sum of 1.0
// the inverse gain is to be non zero
uniform lowp float u_f_inverse_gain;

// transparency will be applied to entire render
// alpha is used for blending purposes
uniform lowp float u_f_alpha;

varying lowp vec2 v_v2_C; // Center
varying lowp vec2 v_v2_TL; // Top Left
varying lowp vec2 v_v2_TR; // Top Right
varying lowp vec2 v_v2_BL; // Bottom Left
varying lowp vec2 v_v2_BR; // Bottom Right
varying lowp vec2 v_v2_L; // Left
varying lowp vec2 v_v2_R; // Right
varying lowp vec4 v_v4_TB; // Top is .xy, Bottom is .zw
void main()
{
// The filter is applied to the color components only
lowp vec3 v3_colorsum = vec3(0.0, 0.0, 0.0);

// Filter Matrix =
// [[ Top Left, Top, Top Right ]
// [ Left, Center, Right ]
// [ Bottom Left, Bottom, Bottom Right ]]
//
// Calculate filter – left to right, top to bottom
v3_colorsum += u_m3_filter[0][0] * texture2D(u_s_source, v_v2_TL).rgb;
v3_colorsum += u_m3_filter[0][1] * texture2D(u_s_source, v_v4_TB.xy).rgb;
v3_colorsum += u_m3_filter[0][2] * texture2D(u_s_source, v_v2_TR).rgb;
v3_colorsum += u_m3_filter[1][0] * texture2D(u_s_source, v_v2_L).rgb;
v3_colorsum += u_m3_filter[1][1] * texture2D(u_s_source, v_v2_C).rgb;
v3_colorsum += u_m3_filter[1][2] * texture2D(u_s_source, v_v2_R).rgb;
v3_colorsum += u_m3_filter[2][0] * texture2D(u_s_source, v_v2_BL).rgb;
v3_colorsum += u_m3_filter[2][1] * texture2D(u_s_source, v_v4_TB.zw).rgb;
v3_colorsum += u_m3_filter[2][2] * texture2D(u_s_source, v_v2_BR).rgb;

// rather than calculate the gain in a shader it is set with the filter
v3_colorsum /= u_f_inverse_gain;

// must clamp the color values to ensure [0.0, 1.0]
gl_FragColor.rgb = clamp(v3_colorsum, 0.0, 1.0);

// an alpha can be set for blending purposes
gl_FragColor.a = clamp(u_f_alpha, 0.0, 1.0);
}

//
// Filter3X3.vsh (Vertex Shader)
//
// Created by Ryan Steder on 11/9/11.
// Copyright 2011 Ryan Steder. All rights reserved.
//
// 3X3 Filter using OpenGL ES 2.0, for use with iPhone, iPod Touch, iPad
// optimized for realtime, 60fps, fullscreen rendering
// NOTE: this could be changed to use a 5X5 filter
// but would likely not run at 60fps fullscreen, but
// most likely possible to apply 5X5 at 30fps fullscreen
//

// Intended to move a pixel from center
// on iPhone should be equal to
// vec2(1.0 / 320.0, 1.0 / 480.0)
uniform vec2 u_v2_pixel_unit;

attribute vec4 a_v4_position;
attribute vec2 a_v2_coordinate;

// Up to 8 varying vectors can be specified
// one of the varying vectors must contain
// two coordinates which will give some
// dependent reads, but minimal for 3X3 filter
// NOTE: swizzling coordinates count as
// dependent texture reads
varying vec2 v_v2_C; // Center
varying vec2 v_v2_TL; // Top Left
varying vec2 v_v2_TR; // Top Right
varying vec2 v_v2_BL; // Bottom Left
varying vec2 v_v2_BR; // Bottom Right
varying vec2 v_v2_L; // Left
varying vec2 v_v2_R; // Right
varying vec4 v_v4_TB; // Top is .xy, Bottom is .zw
void main()
{
// set all the texture lookups in the 8 varying vectors
// setting these seperatly reduces dependent texture reads
// this greatly improves performance
v_v2_C = a_v2_coordinate;
v_v2_TL = vec2(a_v2_coordinate.x – u_v2_pixel_unit.x,
a_v2_coordinate.y + u_v2_pixel_unit.y);
v_v2_TR = vec2(a_v2_coordinate.x + u_v2_pixel_unit.x,
a_v2_coordinate.y + u_v2_pixel_unit.y);
v_v2_BL = vec2(a_v2_coordinate.x – u_v2_pixel_unit.x,
a_v2_coordinate.y – u_v2_pixel_unit.y);
v_v2_BR = vec2(a_v2_coordinate.x + u_v2_pixel_unit.x,
a_v2_coordinate.y – u_v2_pixel_unit.y);
v_v2_L = vec2(a_v2_coordinate.x – u_v2_pixel_unit.x,
a_v2_coordinate.y);
v_v2_R = vec2(a_v2_coordinate.x + u_v2_pixel_unit.x,
a_v2_coordinate.y);
v_v4_TB = vec4(a_v2_coordinate.x,
a_v2_coordinate.y + u_v2_pixel_unit.y,
a_v2_coordinate.x,
a_v2_coordinate.y – u_v2_pixel_unit.y);

// set the position to the vertex.
gl_Position = a_v4_position;
}