Merge pull request #62 from mbellew/sse

SSE version of PresetOutputs::PerPixelMath
This commit is contained in:
Mischa Spiegelmock
2018-05-23 10:11:22 +03:00
committed by GitHub
10 changed files with 472 additions and 324 deletions

View File

@ -11,6 +11,8 @@ AX_CHECK_GL
AC_CHECK_LIB(c, dlopen, LIBDL="", AC_CHECK_LIB(dl, dlopen, LIBDL="-ldl"))
AC_CHECK_FUNCS_ONCE([aligned_alloc posix_memalign])
AC_CONFIG_HEADERS([config.h])
AC_CONFIG_FILES([
Makefile

View File

@ -65,7 +65,7 @@ Param::Param(std::string _name) :
matrix(0)
{
engine_val = new float();
engine_val = (float *)&local_value;
default_init_val.float_val = DEFAULT_DOUBLE_IV;
upper_bound.float_val = DEFAULT_DOUBLE_UB;
@ -73,18 +73,10 @@ Param::Param(std::string _name) :
/// @note may have fixed a recent bug. testing
*((float*)engine_val) = default_init_val.float_val;
}
}
/* Free's a parameter type */
Param::~Param() {
// I hate this, but will let it be for now
if (flags & P_FLAG_USERDEF) {
delete((double*)engine_val);
}
if (PARAM_DEBUG) printf("~Param: freeing \"%s\".\n", name.c_str());
}

View File

@ -58,6 +58,8 @@ class InitCond;
class Param;
class Preset;
//#include <map>
#include <immintrin.h>
/* Parameter Type */
class Param {
@ -72,6 +74,9 @@ public:
CValue upper_bound; /* this parameter's upper bound */
CValue lower_bound; /* this parameter's lower bound */
// for a local variable, engine_val can point here
float local_value;
/// Create a new parameter
Param(std::string name, short int type, short int flags,
void * eqn_val, void *matrix,

View File

@ -5,11 +5,14 @@
#include <iostream>
#include <cmath>
#include "Renderer/BeatDetect.hpp"
#include <immintrin.h>
PresetInputs::PresetInputs() : PipelineContext()
{
}
void PresetInputs::update(const BeatDetect & music, const PipelineContext & context) {
// Reflect new values form the beat detection unit
@ -28,12 +31,38 @@ void PresetInputs::update(const BeatDetect & music, const PipelineContext & cont
this->progress = context.progress;
}
float **alloc_mesh(size_t gx, size_t gy)
{
// round gy up to multiple 4 (for possible SSE optimization)
gy = (gy+3) & ~(size_t)3;
float **mesh = (float **)wipe_aligned_alloc(gx * sizeof(float *));
float *m = (float *)wipe_aligned_alloc(gx * gy * sizeof(float));
for ( int x = 0; x < gx; x++ )
mesh[x] = m + (gy * x);
return mesh;
}
float **free_mesh(float **mesh)
{
wipe_aligned_free(mesh[0]);
wipe_aligned_free(mesh);
return NULL;
}
void copy_mesh(float **dst, float **src, int gx, int gy)
{
memcpy(dst[0], src[0], gx*gy*sizeof(float));
}
void PresetInputs::Initialize ( int gx, int gy )
{
int x, y;
this->gx =gx;
this->gy= gy;
this->gx = gx;
this->gy = gy;
/// @bug no clue if this block belongs here
@ -47,47 +76,14 @@ void PresetInputs::Initialize ( int gx, int gy )
ang_per_pixel = 0;
// ***
this->x_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->x_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->y_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x <gx; x++ )
{
this->y_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->rad_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->rad_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->theta_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x <gx; x++ )
{
this->theta_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->origtheta= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->origtheta[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->origrad= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->origrad[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->origx= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->origx[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->origy= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->origy[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->x_mesh = alloc_mesh(gx, gy);
this->y_mesh = alloc_mesh(gx, gy);
this->rad_mesh = alloc_mesh(gx, gy);
this->theta_mesh= alloc_mesh(gx, gy);
this->origtheta = alloc_mesh(gx, gy);
this->origrad = alloc_mesh(gx, gy);
this->origx = alloc_mesh(gx, gy);
this->origy = alloc_mesh(gx, gy);
for ( x=0;x<gx;x++ )
{
@ -99,52 +95,33 @@ void PresetInputs::Initialize ( int gx, int gy )
this->origtheta[x][y]=atan2 ( ( ( this->origy[x][y]-.5 ) *2 ), ( ( this->origx[x][y]-.5 ) *2 ) );
}
}
}
PresetOutputs::PresetOutputs() : Pipeline()
{}
PresetOutputs::~PresetOutputs()
{
assert(this->gx > 0);
for ( int x = 0; x < this->gx; x++ )
{
free(this->sx_mesh[x]);
free(this->sy_mesh[x]);
free(this->dy_mesh[x]);
free(this->dx_mesh[x]);
free(this->cy_mesh[x]);
free(this->cx_mesh[x]);
free(this->warp_mesh[x]);
free(this->zoom_mesh[x]);
free(this->zoomexp_mesh[x]);
free(this->rot_mesh[x]);
free(this->orig_x[x]);
free(this->orig_y[x]);
free(this->rad_mesh[x]);
}
free(this->rad_mesh);
free(this->sx_mesh);
free(this->sy_mesh);
free(this->dy_mesh);
free(this->dx_mesh);
free(this->cy_mesh);
free(this->cx_mesh);
free(this->warp_mesh);
free(this->zoom_mesh);
free(this->zoomexp_mesh);
free(this->rot_mesh);
free(this->orig_x);
free(this->orig_y);
this->rad_mesh = free_mesh(this->rad_mesh);
this->sx_mesh = free_mesh(this->sx_mesh);
this->sy_mesh = free_mesh(this->sy_mesh);
this->dy_mesh = free_mesh(this->dy_mesh);
this->dx_mesh = free_mesh(this->dx_mesh);
this->cy_mesh = free_mesh(this->cy_mesh);
this->cx_mesh = free_mesh(this->cx_mesh);
this->warp_mesh = free_mesh(this->warp_mesh);
this->zoom_mesh = free_mesh(this->zoom_mesh);
this->zoomexp_mesh = free_mesh(this->zoomexp_mesh);
this->rot_mesh = free_mesh(this->rot_mesh);
this->orig_x = free_mesh(this->orig_x);
this->orig_y = free_mesh(this->orig_y);
}
void PresetOutputs::Render(const BeatDetect &music, const PipelineContext &context)
{
PerPixelMath(context);
@ -155,18 +132,22 @@ void PresetOutputs::Render(const BeatDetect &music, const PipelineContext &conte
for (PresetOutputs::cshape_container::iterator pos = customShapes.begin();
pos != customShapes.end(); ++pos)
{
if( (*pos)->enabled==1) drawables.push_back((*pos));
}
{
if ((*pos)->enabled==1)
drawables.push_back((*pos));
}
for (PresetOutputs::cwave_container::iterator pos = customWaves.begin();
pos != customWaves.end(); ++pos)
{
if( (*pos)->enabled==1) drawables.push_back((*pos));
}
{
if ((*pos)->enabled==1)
drawables.push_back((*pos));
}
drawables.push_back(&wave);
if(bDarkenCenter==1) drawables.push_back(&darkenCenter);
drawables.push_back(&wave);
if (bDarkenCenter==1)
drawables.push_back(&darkenCenter);
drawables.push_back(&border);
compositeDrawables.clear();
@ -185,257 +166,358 @@ void PresetOutputs::Render(const BeatDetect &music, const PipelineContext &conte
compositeDrawables.push_back(&invert);
}
// N.B. The more optimization that can be done on this method, the better! This is called a lot and can probably be improved.
void PresetOutputs::PerPixelMath(const PipelineContext &context)
void PresetOutputs::PerPixelMath_c(const PipelineContext &context)
{
int x, y;
float fZoom2, fZoom2Inv;
for (x = 0; x < gx; x++)
for (int x = 0; x < gx; x++)
{
for (y = 0; y < gy; y++)
for (int y = 0; y < gy; y++)
{
fZoom2 = std::pow(this->zoom_mesh[x][y], std::pow(this->zoomexp_mesh[x][y],
const float fZoom2 = std::pow(this->zoom_mesh[x][y], std::pow(this->zoomexp_mesh[x][y],
rad_mesh[x][y] * 2.0f - 1.0f));
fZoom2Inv = 1.0f / fZoom2;
const float fZoom2Inv = 1.0f / fZoom2;
this->x_mesh[x][y] = this->orig_x[x][y] * 0.5f * fZoom2Inv + 0.5f;
this->x_mesh[x][y] = (this->x_mesh[x][y] - this->cx_mesh[x][y]) / this->sx_mesh[x][y] + this->cx_mesh[x][y];
this->y_mesh[x][y] = this->orig_y[x][y] * 0.5f * fZoom2Inv + 0.5f;
this->y_mesh[x][y] = (this->y_mesh[x][y] - this->cy_mesh[x][y]) / this->sy_mesh[x][y] + this->cy_mesh[x][y];
}
}
for (x = 0; x < gx; x++)
{
for (y = 0; y < gy; y++)
{
this->x_mesh[x][y] = (this->x_mesh[x][y] - this->cx_mesh[x][y])
/ this->sx_mesh[x][y] + this->cx_mesh[x][y];
}
}
for (x = 0; x < gx; x++)
{
for (y = 0; y < gy; y++)
{
this->y_mesh[x][y] = (this->y_mesh[x][y] - this->cy_mesh[x][y])
/ this->sy_mesh[x][y] + this->cy_mesh[x][y];
}
}
float fWarpTime = context.time * this->fWarpAnimSpeed;
float fWarpScaleInv = 1.0f / this->fWarpScale;
const float fWarpTime = context.time * this->fWarpAnimSpeed;
const float fWarpScaleInv = 1.0f / this->fWarpScale;
float f[4];
f[0] = 11.68f + 4.0f * cosf(fWarpTime * 1.413f + 10);
f[1] = 8.77f + 3.0f * cosf(fWarpTime * 1.113f + 7);
f[2] = 10.54f + 3.0f * cosf(fWarpTime * 1.233f + 3);
f[3] = 11.49f + 4.0f * cosf(fWarpTime * 0.933f + 5);
for (x = 0; x < gx; x++)
for (int x = 0; x < gx; x++)
{
for (y = 0; y < gy; y++)
for (int y = 0; y < gy; y++)
{
this->x_mesh[x][y] += this->warp_mesh[x][y] * 0.0035f * sinf(fWarpTime * 0.333f
+ fWarpScaleInv * (this->orig_x[x][y] * f[0] - this->orig_y[x][y] * f[3]));
this->y_mesh[x][y] += this->warp_mesh[x][y] * 0.0035f * cosf(fWarpTime * 0.375f
- fWarpScaleInv * (this->orig_x[x][y] * f[2] + this->orig_y[x][y] * f[1]));
this->x_mesh[x][y] += this->warp_mesh[x][y] * 0.0035f * cosf(fWarpTime * 0.753f
- fWarpScaleInv * (this->orig_x[x][y] * f[1] - this->orig_y[x][y] * f[2]));
this->y_mesh[x][y] += this->warp_mesh[x][y] * 0.0035f * sinf(fWarpTime * 0.825f
+ fWarpScaleInv * (this->orig_x[x][y] * f[0] + this->orig_y[x][y] * f[3]));
}
}
for (x = 0; x < gx; x++)
{
for (y = 0; y < gy; y++)
{
float u2 = this->x_mesh[x][y] - this->cx_mesh[x][y];
float v2 = this->y_mesh[x][y] - this->cy_mesh[x][y];
const float orig_x = this->orig_x[x][y];
const float orig_y = this->orig_y[x][y];
const float warp_mesh = this->warp_mesh[x][y] * 0.0035f;
float cos_rot = cosf(this->rot_mesh[x][y]);
float sin_rot = sinf(this->rot_mesh[x][y]);
this->x_mesh[x][y] = u2 * cos_rot - v2 * sin_rot + this->cx_mesh[x][y];
this->y_mesh[x][y] = u2 * sin_rot + v2 * cos_rot + this->cy_mesh[x][y];
this->x_mesh[x][y] +=
(warp_mesh * sinf(fWarpTime * 0.333f + fWarpScaleInv * (orig_x * f[0] - orig_y * f[3]))) +
(warp_mesh * cosf(fWarpTime * 0.753f - fWarpScaleInv * (orig_x * f[1] - orig_y * f[2])));
this->y_mesh[x][y] +=
(warp_mesh * cosf(fWarpTime * 0.375f - fWarpScaleInv * (orig_x * f[2] + orig_y * f[1]))) +
(warp_mesh * sinf(fWarpTime * 0.825f + fWarpScaleInv * (orig_x * f[0] + orig_y * f[3])));
}
}
for (x = 0; x < gx; x++)
for (y = 0; y < gy; y++)
this->x_mesh[x][y] -= this->dx_mesh[x][y];
for (int x = 0; x < gx; x++)
{
for (int y = 0; y < gy; y++)
{
const float u2 = this->x_mesh[x][y] - this->cx_mesh[x][y];
const float v2 = this->y_mesh[x][y] - this->cy_mesh[x][y];
for (x = 0; x < gx; x++)
for (y = 0; y < gy; y++)
this->y_mesh[x][y] -= this->dy_mesh[x][y];
const float rot = this->rot_mesh[x][y];
const float cos_rot = cosf(rot);
const float sin_rot = sinf(rot);
this->x_mesh[x][y] = u2 * cos_rot - v2 * sin_rot + this->cx_mesh[x][y] - this->dx_mesh[x][y];
this->y_mesh[x][y] = u2 * sin_rot + v2 * cos_rot + this->cy_mesh[x][y] - this->dy_mesh[x][y];
}
}
}
#ifdef __SSE2__
// is there an SSE way to do this?
inline __m128 _mm_pow(__m128 x, __m128 y)
{
float X[4];
float Y[4];
_mm_store_ps(X,x);
_mm_store_ps(Y,x);
X[0] = __builtin_powf(X[0],Y[0]);
X[1] = __builtin_powf(X[1],Y[1]);
X[2] = __builtin_powf(X[2],Y[2]);
X[3] = __builtin_powf(X[3],Y[3]);
return _mm_load_ps(X);
}
inline void _mm_sincosf(__m128 x, __m128 &sinx, __m128 &cosx)
{
float X[4], S[4], C[4];
_mm_store_ps(X,x);
S[0] = sinf(X[0]);
C[0] = cosf(X[0]);
S[1] = sinf(X[1]);
C[1] = cosf(X[1]);
S[2] = sinf(X[2]);
C[2] = cosf(X[2]);
S[3] = sinf(X[3]);
C[3] = cosf(X[3]);
sinx = _mm_load_ps(S);
cosx = _mm_load_ps(C);
}
inline __m128 _mm_sinf(__m128 x)
{
float X[4];
_mm_store_ps(X,x);
X[0] = sinf(X[0]);
X[1] = sinf(X[1]);
X[2] = sinf(X[2]);
X[3] = sinf(X[3]);
return _mm_load_ps(X);
}
inline __m128 _mm_cosf(__m128 x)
{
float X[4];
_mm_store_ps(X,x);
X[0] = cosf(X[0]);
X[1] = cosf(X[1]);
X[2] = cosf(X[2]);
X[3] = cosf(X[3]);
return _mm_load_ps(X);
}
void PresetOutputs::PerPixelMath_sse(const PipelineContext &context)
{
for (int x = 0; x < gx; x++)
{
for (int y = 0; y < gy; y += 4)
{
// fZoom2 = std::pow(this->zoom_mesh[x][y], std::pow(this->zoomexp_mesh[x][y],
// rad_mesh[x][y] * 2.0f - 1.0f));
__m128 rad_mesh_scaled =
_mm_sub_ps(
_mm_mul_ps(
_mm_load_ps(&this->rad_mesh[x][y]),
_mm_set_ps1(2.0f)),
_mm_set_ps1(1.0f));
__m128 zoom_mesh = _mm_load_ps(&this->zoom_mesh[x][y]);
__m128 zoomexp_mesh = _mm_load_ps(&this->zoomexp_mesh[x][y]);
__m128 fZoom2 = _mm_pow(zoom_mesh, _mm_pow(zoomexp_mesh, rad_mesh_scaled));
// fZoom2Inv = 1.0f / fZoom2;
__m128 fZoomInv = _mm_rcp_ps(fZoom2);
// this->x_mesh[x][y] = this->orig_x[x][y] * 0.5f * fZoom2Inv + 0.5f;
__m128 x_mesh =
_mm_add_ps(
_mm_mul_ps(
_mm_load_ps(&this->orig_x[x][y]),
_mm_mul_ps(fZoomInv,_mm_set_ps1(0.5f))), // CONSIDER: common sub-expression
_mm_set_ps1(0.5f));
// this->x_mesh[x][y] = (this->x_mesh[x][y] - this->cx_mesh[x][y]) / this->sx_mesh[x][y] + this->cx_mesh[x][y];
__m128 cx_mesh = _mm_load_ps(&this->cx_mesh[x][y]);
__m128 sx_mesh = _mm_load_ps(&this->sx_mesh[x][y]);
_mm_store_ps(&this->x_mesh[x][y],
_mm_add_ps(
_mm_div_ps(
_mm_sub_ps(x_mesh,cx_mesh),
sx_mesh),
cx_mesh
));
// this->y_mesh[x][y] = this->orig_y[x][y] * 0.5f * fZoom2Inv + 0.5f;
__m128 y_mesh =
_mm_add_ps(
_mm_mul_ps(
_mm_load_ps(&this->orig_y[x][y]),
_mm_mul_ps(fZoomInv,_mm_set_ps1(0.5f))),
_mm_set_ps1(0.5f));
// this->y_mesh[x][y] = (this->y_mesh[x][y] - this->cy_mesh[x][y]) / this->sy_mesh[x][y] + this->cy_mesh[x][y];
__m128 cy_mesh = _mm_load_ps(&this->cy_mesh[x][y]);
__m128 sy_mesh = _mm_load_ps(&this->sy_mesh[x][y]);
_mm_store_ps(&this->y_mesh[x][y],
_mm_add_ps(
_mm_div_ps(
_mm_sub_ps(y_mesh,cy_mesh),
sy_mesh),
cy_mesh
));
}
}
const float fWarpTime = context.time * this->fWarpAnimSpeed;
const float fWarpScaleInv = 1.0f / this->fWarpScale;
const float f[4] =
{
11.68f + 4.0f * cosf(fWarpTime * 1.413f + 10),
8.77f + 3.0f * cosf(fWarpTime * 1.113f + 7),
10.54f + 3.0f * cosf(fWarpTime * 1.233f + 3),
11.49f + 4.0f * cosf(fWarpTime * 0.933f + 5)
};
for (int x = 0; x < gx; x++)
{
for (int y = 0; y < gy; y+=4)
{
//float orig_x = this->orig_x[x][y];
//float orig_y = this->orig_y[x][y];
//float warp_mesh = this->warp_mesh[x][y] * 0.0035f;
const __m128 orig_x = _mm_load_ps(&this->orig_x[x][y]);
const __m128 orig_y = _mm_load_ps(&this->orig_y[x][y]);
const __m128 warp_mesh = _mm_mul_ps(_mm_load_ps(&this->warp_mesh[x][y]), _mm_set_ps1(0.0035f));
// this->x_mesh[x][y] +=
// (warp_mesh * sinf(fWarpTime * 0.333f + fWarpScaleInv * (orig_x * f[0] - orig_y * f[3]))) +
// (warp_mesh * cosf(fWarpTime * 0.753f - fWarpScaleInv * (orig_x * f[1] - orig_y * f[2])));
_mm_store_ps(&this->x_mesh[x][y],
_mm_add_ps(_mm_load_ps(&this->x_mesh[x][y]),
_mm_add_ps(
_mm_mul_ps(warp_mesh, _mm_sinf(
_mm_add_ps(
_mm_set_ps1(fWarpTime*0.333f),
_mm_mul_ps(_mm_set_ps1(fWarpScaleInv),
_mm_sub_ps(
_mm_mul_ps(orig_x, _mm_set_ps1(f[0])),
_mm_mul_ps(orig_y, _mm_set_ps1(f[3]))
))))),
_mm_mul_ps(warp_mesh, _mm_cosf(
_mm_sub_ps(
_mm_set_ps1(fWarpTime*0.753f),
_mm_mul_ps(_mm_set_ps1(fWarpScaleInv),
_mm_sub_ps(
_mm_mul_ps(orig_x, _mm_set_ps1(f[1])),
_mm_mul_ps(orig_y, _mm_set_ps1(f[2]))
))))))));
// this->y_mesh[x][y] +=
// (warp_mesh * cosf(fWarpTime * 0.375f - fWarpScaleInv * (orig_x * f[2] + orig_y * f[1]))) +
// (warp_mesh * sinf(fWarpTime * 0.825f + fWarpScaleInv * (orig_x * f[0] + orig_y * f[3])));
_mm_store_ps(&this->y_mesh[x][y],
_mm_add_ps(_mm_load_ps(&this->y_mesh[x][y]),
_mm_add_ps(
_mm_mul_ps(warp_mesh, _mm_cosf(
_mm_sub_ps(
_mm_set_ps1(fWarpTime*0.375f),
_mm_mul_ps(_mm_set_ps1(fWarpScaleInv),
_mm_add_ps(
_mm_mul_ps(orig_x, _mm_set_ps1(f[2])),
_mm_mul_ps(orig_y, _mm_set_ps1(f[1]))
))))),
_mm_mul_ps(warp_mesh, _mm_sinf(
_mm_add_ps(
_mm_set_ps1(fWarpTime*0.825f),
_mm_mul_ps(_mm_set_ps1(fWarpScaleInv),
_mm_add_ps(
_mm_mul_ps(orig_x, _mm_set_ps1(f[0])),
_mm_mul_ps(orig_y, _mm_set_ps1(f[3]))
))))))));
}
}
for (int x = 0; x < gx; x++)
{
for (int y = 0; y < gy; y+=4)
{
// const float u2 = this->x_mesh[x][y] - this->cx_mesh[x][y];
// const float v2 = this->y_mesh[x][y] - this->cy_mesh[x][y];
const __m128 u2 = _mm_sub_ps(_mm_load_ps(&this->x_mesh[x][y]),_mm_load_ps(&this->cx_mesh[x][y]));
const __m128 v2 = _mm_sub_ps(_mm_load_ps(&this->y_mesh[x][y]),_mm_load_ps(&this->cy_mesh[x][y]));
// const float rot = this->rot_mesh[x][y];
// const float cos_rot = cosf(rot);
// const float sin_rot = sinf(rot);
__m128 sin_rot, cos_rot;
_mm_sincosf(_mm_load_ps(&this->rot_mesh[x][y]), sin_rot, cos_rot);
// this->x_mesh[x][y] = u2 * cos_rot - v2 * sin_rot + this->cx_mesh[x][y] - this->dx_mesh[x][y];
_mm_store_ps(&this->x_mesh[x][y],
_mm_add_ps(
_mm_sub_ps(_mm_mul_ps(u2, cos_rot), _mm_mul_ps(v2,sin_rot)),
_mm_sub_ps(_mm_load_ps(&this->cx_mesh[x][y]), _mm_load_ps(&this->dx_mesh[x][y]))
));
// this->y_mesh[x][y] = u2 * sin_rot + v2 * cos_rot + this->cy_mesh[x][y] - this->dy_mesh[x][y];
_mm_store_ps(&this->y_mesh[x][y],
_mm_add_ps(
_mm_add_ps(_mm_mul_ps(u2, sin_rot), _mm_mul_ps(v2,cos_rot)),
_mm_sub_ps(_mm_load_ps(&this->cy_mesh[x][y]), _mm_load_ps(&this->dy_mesh[x][y]))
));
}
}
}
#endif
void PresetOutputs::PerPixelMath(const PipelineContext &context)
{
#ifdef __SSE2__
PerPixelMath_sse(context);
#else
PerPixelMath_c(context);
#endif
}
void PresetOutputs::Initialize ( int gx, int gy )
{
assert(gx > 0);
this->gx = gx;
this->gy= gy;
this->gy = gy;
staticPerPixel = true;
setStaticPerPixel(gx,gy);
assert(this->gx > 0);
int x;
this->x_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->x_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->y_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->y_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->sx_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->sx_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->sy_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->sy_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->dx_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->dx_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->dy_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->dy_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->cx_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->cx_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->cy_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->cy_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->zoom_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->zoom_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->zoomexp_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->zoomexp_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->rot_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->rot_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->x_mesh = alloc_mesh( gx, gy );
this->y_mesh = alloc_mesh( gx, gy );
this->sx_mesh = alloc_mesh( gx, gy );
this->sy_mesh = alloc_mesh( gx, gy );
this->dx_mesh = alloc_mesh( gx, gy );
this->dy_mesh = alloc_mesh( gx, gy );
this->cx_mesh = alloc_mesh( gx, gy );
this->cy_mesh = alloc_mesh( gx, gy );
this->zoom_mesh = alloc_mesh( gx, gy );
this->zoomexp_mesh = alloc_mesh( gx, gy );
this->rot_mesh = alloc_mesh( gx, gy );
this->warp_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->warp_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->rad_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( x = 0; x < gx; x++ )
{
this->rad_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->orig_x = (float **) wipemalloc(gx * sizeof(float *));
this->warp_mesh = alloc_mesh( gx, gy );
this->rad_mesh = alloc_mesh( gx, gy );
this->orig_x = alloc_mesh( gx, gy );
this->orig_y = alloc_mesh( gx, gy );
//initialize reference grid values
for (x = 0; x < gx; x++)
{
this->orig_x[x] = (float *) wipemalloc(gy * sizeof(float));
}
this->orig_y = (float **) wipemalloc(gx * sizeof(float *));
for (x = 0; x < gx; x++)
{
this->orig_y[x] = (float *) wipemalloc(gy * sizeof(float));
}
//initialize reference grid values
for (x = 0; x < gx; x++)
for (int y = 0; y < gy; y++)
{
for (int y = 0; y < gy; y++)
{
float origx = x / (float) (gx - 1);
float origy = -((y / (float) (gy - 1)) - 1);
float origx = x / (float) (gx - 1);
float origy = -((y / (float) (gy - 1)) - 1);
rad_mesh[x][y]=hypot ( ( origx-.5 ) *2, ( origy-.5 ) *2 ) * .7071067;
orig_x[x][y] = (origx - .5) * 2;
orig_y[x][y] = (origy - .5) * 2;
}
rad_mesh[x][y]=hypot ( ( origx-.5 ) *2, ( origy-.5 ) *2 ) * .7071067;
orig_x[x][y] = (origx - .5) * 2;
orig_y[x][y] = (origy - .5) * 2;
}
}
}
PresetInputs::~PresetInputs()
{
for ( int x = 0; x < this->gx; x++ )
{
free ( this->origtheta[x] );
free ( this->origrad[x] );
free ( this->origx[x] );
free ( this->origy[x] );
free ( this->x_mesh[x] );
free ( this->y_mesh[x] );
free ( this->rad_mesh[x] );
free ( this->theta_mesh[x] );
}
free ( this->origx );
free ( this->origy );
free ( this->origrad );
free ( this->origtheta );
free ( this->x_mesh );
free ( this->y_mesh );
free ( this->rad_mesh );
free ( this->theta_mesh );
this->origx = NULL;
this->origy = NULL;
this->origtheta = NULL;
this->origrad = NULL;
this->x_mesh = NULL;
this->y_mesh = NULL;
this->rad_mesh = NULL;
this->theta_mesh = NULL;
this->origx = free_mesh ( this->origx );
this->origy = free_mesh ( this->origy );
this->origrad = free_mesh ( this->origrad );
this->origtheta = free_mesh ( this->origtheta );
this->x_mesh = free_mesh ( this->x_mesh );
this->y_mesh = free_mesh ( this->y_mesh );
this->rad_mesh = free_mesh ( this->rad_mesh );
this->theta_mesh = free_mesh ( this->theta_mesh );
}
void PresetInputs::resetMesh()
{
int x,y;
assert ( x_mesh );
assert ( y_mesh );
assert ( rad_mesh );
assert ( theta_mesh );
for ( x=0;x<this->gx;x++ )
{
for ( y=0;y<this->gy;y++ )
{
x_mesh[x][y]=this->origx[x][y];
y_mesh[x][y]=this->origy[x][y];
rad_mesh[x][y]=this->origrad[x][y];
theta_mesh[x][y]=this->origtheta[x][y];
}
}
copy_mesh(this->x_mesh, this->origx, gx, gy);
copy_mesh(this->y_mesh, this->origy, gx, gy);
copy_mesh(this->rad_mesh, this->origrad, gx, gy);
copy_mesh(this->theta_mesh, this->origtheta, gx, gy);
}

View File

@ -138,6 +138,12 @@ public:
float **orig_x; //original mesh
float **orig_y;
float **rad_mesh;
private:
void PerPixelMath_c( const PipelineContext &context);
#ifdef __SSE2__
void PerPixelMath_sse( const PipelineContext &context);
#endif
};

View File

@ -11,37 +11,26 @@ Pipeline::Pipeline() : staticPerPixel(false),gx(0),gy(0),blur1n(1), blur2n(1), b
blur1x(1), blur2x(1), blur3x(1),
blur1ed(1){}
float **alloc_mesh(size_t gx, size_t gy);
float **free_mesh(float **mesh);
void Pipeline::setStaticPerPixel(int gx, int gy)
{
staticPerPixel = true;
this->gx = gx;
this->gy = gy;
this->x_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( int x = 0; x < gx; x++ )
{
this->x_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
this->y_mesh= ( float ** ) wipemalloc ( gx * sizeof ( float * ) );
for ( int x = 0; x < gx; x++ )
{
this->y_mesh[x] = ( float * ) wipemalloc ( gy * sizeof ( float ) );
}
staticPerPixel = true;
this->gx = gx;
this->gy = gy;
this->x_mesh = alloc_mesh(gx, gy);
this->y_mesh = alloc_mesh(gx, gy);
}
Pipeline::~Pipeline()
{
if (staticPerPixel)
{
for ( int x = 0; x < this->gx; x++ )
if (staticPerPixel)
{
free(this->x_mesh[x]);
free(this->y_mesh[x]);
free_mesh(x_mesh);
free_mesh(y_mesh);
}
free(x_mesh);
free(y_mesh);
}
}
//void Pipeline::Render(const BeatDetect &music, const PipelineContext &context){}

View File

@ -25,20 +25,85 @@
*/
#include "wipemalloc.h"
#include <assert.h>
void *wipemalloc( size_t count ) {
void *wipemalloc( size_t count )
{
void *mem = malloc( count );
if ( mem != NULL ) {
if ( mem != NULL )
{
memset( mem, 0, count );
} else {
}
else
{
printf( "wipemalloc() failed to allocate %d bytes\n", (int)count );
}
}
return mem;
}
}
/** Safe memory deallocator */
void wipefree( void *ptr ) {
if ( ptr != NULL ) {
void wipefree( void *ptr )
{
if ( ptr != NULL )
free( ptr );
}
}
}
void *wipe_aligned_alloc( size_t align, size_t size )
{
void *mem = NULL;
#if HAVE_ALIGNED_ALLOC==1
mem = aligned_alloc( align, size );
#elif HAVE_POSIX_MEMALIGN==1
if (posix_memalign(&mem, align, size))
mem = NULL;
#else
// only support powers of 2 for align
assert( (align & (align-1)) == 0 );
assert( (size % align) == 0 );
void *allocated = malloc(size + align - 1 + sizeof(void*));
if (allocated)
{
mem = (void*) (((size_t)allocated + sizeof(void*) + align -1) & ~(align-1));
((void**)mem)[-1] = allocated;
}
#endif
if (mem)
{
memset( mem, 0, size );
}
else
{
printf( "wipe_aligned_alloc() failed to allocate %d bytes\n", (int)size );
}
return mem;
}
void wipe_aligned_free( void *p )
{
#if HAVE_ALIGNED_ALLOC==1 || HAVE_POSIX_MEMALIGN==1
if (p != NULL)
free(p);
#else
if (p != NULL)
{
void *allocated = ((void**)p)[-1];
free(allocated);
}
#endif
}

View File

@ -57,4 +57,8 @@
void *wipemalloc( size_t count );
void wipefree( void *ptr );
/** wipe_aligned_malloc() must be matched with aligned_free() */
void *wipe_aligned_alloc( size_t align, size_t count);
inline void *wipe_aligned_alloc( size_t count ) { return wipe_aligned_alloc(16,count); }
void wipe_aligned_free( void *ptr );
#endif /** !_WIPEMALLOC_H */

View File

@ -96,13 +96,11 @@ int projectMSDL::openAudioInput() {
void projectMSDL::beginAudioCapture() {
// allocate a buffer to store PCM data for feeding in
unsigned int maxSamples = audioChannelsCount * audioSampleCount;
pcmBuffer = (unsigned char *) malloc(maxSamples);
SDL_PauseAudioDevice(audioDeviceID, false);
pcm()->initPCM(2048);
}
void projectMSDL::endAudioCapture() {
free(pcmBuffer);
SDL_PauseAudioDevice(audioDeviceID, true);
}
@ -238,3 +236,9 @@ void projectMSDL::init(SDL_Window *window, SDL_Renderer *renderer) {
selectRandom(true);
projectM_resetGL(width, height);
}
std::string projectMSDL::getActivePresetName()
{
return std::string("hey");
}

View File

@ -44,6 +44,7 @@ public:
void renderFrame();
void pollEvent();
void maximize();
std::string getActivePresetName();
private:
SDL_Window *win;
@ -59,12 +60,10 @@ private:
unsigned short audioSampleCount;
SDL_AudioFormat audioFormat;
SDL_AudioDeviceID audioDeviceID;
unsigned char *pcmBuffer; // pre-allocated buffer for audioInputCallback
static void audioInputCallbackF32(void *userdata, unsigned char *stream, int len);
static void audioInputCallbackS16(void *userdata, unsigned char *stream, int len);
void addFakePCM();
void keyHandler(SDL_Event *);
SDL_AudioDeviceID selectAudioInput(int count);