Рефакторинг и оптимизация вычислений / Refactoring and optimization of computations

This commit is contained in:
Andrey Pokidov 2024-11-26 02:25:04 +07:00
parent 03e390c1d0
commit 2655e43cb4
15 changed files with 810 additions and 829 deletions

View file

@ -58,45 +58,9 @@ static inline void bg_fp64_versor_reset(BgFP64Versor* versor)
// ==================== Set ===================== //
void __bg_fp32_versor_normalize(const float square_modulus, __BgFP32DarkTwinVersor* twin);
void bg_fp32_versor_set_values(const float s0, const float x1, const float x2, const float x3, BgFP32Versor* versor);
void __bg_fp64_versor_normalize(const double square_modulus, __BgFP64DarkTwinVersor* twin);
static inline void bg_fp32_versor_set_values(const float s0, const float x1, const float x2, const float x3, BgFP32Versor* versor)
{
__BgFP32DarkTwinVersor* twin = (__BgFP32DarkTwinVersor*)versor;
twin->s0 = s0;
twin->x1 = x1;
twin->x2 = x2;
twin->x3 = x3;
const float square_modulus = (s0 * s0 + x1 * x1) + (x2 * x2 + x3 * x3);
if (1.0f - BG_FP32_TWO_EPSYLON <= square_modulus && square_modulus <= 1.0f + BG_FP32_TWO_EPSYLON) {
return;
}
__bg_fp32_versor_normalize(square_modulus, (__BgFP32DarkTwinVersor*)versor);
}
static inline void bg_fp64_versor_set_values(const double s0, const double x1, const double x2, const double x3, BgFP64Versor* versor)
{
__BgFP64DarkTwinVersor* twin = (__BgFP64DarkTwinVersor*)versor;
twin->s0 = s0;
twin->x1 = x1;
twin->x2 = x2;
twin->x3 = x3;
const double square_modulus = (s0 * s0 + x1 * x1) + (x2 * x2 + x3 * x3);
if (1.0 - BG_FP64_TWO_EPSYLON <= square_modulus && square_modulus <= 1.0 + BG_FP64_TWO_EPSYLON) {
return;
}
__bg_fp64_versor_normalize(square_modulus, twin);
}
void bg_fp64_versor_set_values(const double s0, const double x1, const double x2, const double x3, BgFP64Versor* versor);
// ==================== Copy ==================== //
@ -152,24 +116,24 @@ static inline void bg_fp64_versor_set_rotation(const BgFP64Rotation3* rotation,
// =============== Square modulus =============== //
static inline int bg_fp32_versor_get_square_modulus(const BgFP32Versor* versor)
static inline float bg_fp32_versor_get_square_modulus(const BgFP32Versor* versor)
{
return (versor->s0 * versor->s0 + versor->x1 * versor->x1) + (versor->x2 * versor->x2 + versor->x3 * versor->x3);
}
static inline int bg_fp64_versor_get_square_modulus(const BgFP64Versor* versor)
static inline double bg_fp64_versor_get_square_modulus(const BgFP64Versor* versor)
{
return (versor->s0 * versor->s0 + versor->x1 * versor->x1) + (versor->x2 * versor->x2 + versor->x3 * versor->x3);
}
// =================== Modulus ================== //
static inline int bg_fp32_versor_get_modulus(const BgFP32Versor* versor)
static inline float bg_fp32_versor_get_modulus(const BgFP32Versor* versor)
{
return sqrtf(bg_fp32_versor_get_square_modulus(versor));
}
static inline int bg_fp64_versor_get_modulus(const BgFP64Versor* versor)
static inline double bg_fp64_versor_get_modulus(const BgFP64Versor* versor)
{
return sqrt(bg_fp64_versor_get_square_modulus(versor));
}
@ -274,51 +238,9 @@ static inline void bg_fp64_versor_set_inverted_fp32(const BgFP32Versor* versor,
// ================ Combination ================= //
static inline void bg_fp32_versor_combine(const BgFP32Versor* second, const BgFP32Versor* first, BgFP32Versor* result)
{
const float s0 = (second->s0 * first->s0 - second->x1 * first->x1) - (second->x2 * first->x2 + second->x3 * first->x3);
const float x1 = (second->x1 * first->s0 + second->s0 * first->x1) - (second->x3 * first->x2 - second->x2 * first->x3);
const float x2 = (second->x2 * first->s0 + second->s0 * first->x2) - (second->x1 * first->x3 - second->x3 * first->x1);
const float x3 = (second->x3 * first->s0 + second->s0 * first->x3) - (second->x2 * first->x1 - second->x1 * first->x2);
void bg_fp32_versor_combine(const BgFP32Versor* second, const BgFP32Versor* first, BgFP32Versor* result);
const float square_modulus = (s0 * s0 + x1 * x1) + (x2 * x2 + x3 * x3);
__BgFP32DarkTwinVersor* twin = (__BgFP32DarkTwinVersor*)result;
twin->s0 = s0;
twin->x1 = x1;
twin->x2 = x2;
twin->x3 = x3;
if (1.0f - BG_FP32_TWO_EPSYLON <= square_modulus && square_modulus <= 1.0f + BG_FP32_TWO_EPSYLON) {
return;
}
__bg_fp32_versor_normalize(square_modulus, twin);
}
static inline void bg_fp64_versor_combine(const BgFP64Versor* second, const BgFP64Versor* first, BgFP64Versor* result)
{
const double s0 = (second->s0 * first->s0 - second->x1 * first->x1) - (second->x2 * first->x2 + second->x3 * first->x3);
const double x1 = (second->x1 * first->s0 + second->s0 * first->x1) - (second->x3 * first->x2 - second->x2 * first->x3);
const double x2 = (second->x2 * first->s0 + second->s0 * first->x2) - (second->x1 * first->x3 - second->x3 * first->x1);
const double x3 = (second->x3 * first->s0 + second->s0 * first->x3) - (second->x2 * first->x1 - second->x1 * first->x2);
const double square_modulus = (s0 * s0 + x1 * x1) + (x2 * x2 + x3 * x3);
__BgFP64DarkTwinVersor* twin = (__BgFP64DarkTwinVersor*)result;
twin->s0 = s0;
twin->x1 = x1;
twin->x2 = x2;
twin->x3 = x3;
if (1.0 - BG_FP64_TWO_EPSYLON <= square_modulus && square_modulus <= 1.0 + BG_FP64_TWO_EPSYLON) {
return;
}
__bg_fp64_versor_normalize(square_modulus, twin);
}
void bg_fp64_versor_combine(const BgFP64Versor* second, const BgFP64Versor* first, BgFP64Versor* result);
// ================= Rotation3 ================== //
@ -328,330 +250,26 @@ void bg_fp64_versor_get_rotation(const BgFP64Versor* versor, BgFP64Rotation3* re
// =========== Make Rotation Matrix3x3 ========== //
static inline void bg_fp32_versor_get_rotation_matrix(const BgFP32Versor* versor, BgFP32Matrix3x3* matrix)
{
const float s0s0 = versor->s0 * versor->s0;
const float x1x1 = versor->x1 * versor->x1;
const float x2x2 = versor->x2 * versor->x2;
const float x3x3 = versor->x3 * versor->x3;
void bg_fp32_versor_get_rotation_matrix(const BgFP32Versor* versor, BgFP32Matrix3x3* matrix);
const float s0x1 = 2.0f * versor->s0 * versor->x1;
const float s0x2 = 2.0f * versor->s0 * versor->x2;
const float s0x3 = 2.0f * versor->s0 * versor->x3;
const float x1x2 = 2.0f * versor->x1 * versor->x2;
const float x1x3 = 2.0f * versor->x1 * versor->x3;
const float x2x3 = 2.0f * versor->x2 * versor->x3;
matrix->r1c1 = (s0s0 + x1x1) - (x2x2 + x3x3);
matrix->r2c2 = (s0s0 + x2x2) - (x1x1 + x3x3);
matrix->r3c3 = (s0s0 + x3x3) - (x1x1 + x2x2);
matrix->r1c2 = x1x2 - s0x3;
matrix->r2c3 = x2x3 - s0x1;
matrix->r3c1 = x1x3 - s0x2;
matrix->r2c1 = x1x2 + s0x3;
matrix->r3c2 = x2x3 + s0x1;
matrix->r1c3 = x1x3 + s0x2;
}
static inline void bg_fp64_versor_get_rotation_matrix(const BgFP64Versor* versor, BgFP64Matrix3x3* matrix)
{
const double s0s0 = versor->s0 * versor->s0;
const double x1x1 = versor->x1 * versor->x1;
const double x2x2 = versor->x2 * versor->x2;
const double x3x3 = versor->x3 * versor->x3;
const double s0x1 = 2.0 * versor->s0 * versor->x1;
const double s0x2 = 2.0 * versor->s0 * versor->x2;
const double s0x3 = 2.0 * versor->s0 * versor->x3;
const double x1x2 = 2.0 * versor->x1 * versor->x2;
const double x1x3 = 2.0 * versor->x1 * versor->x3;
const double x2x3 = 2.0 * versor->x2 * versor->x3;
matrix->r1c1 = (s0s0 + x1x1) - (x2x2 + x3x3);
matrix->r2c2 = (s0s0 + x2x2) - (x1x1 + x3x3);
matrix->r3c3 = (s0s0 + x3x3) - (x1x1 + x2x2);
matrix->r1c2 = x1x2 - s0x3;
matrix->r2c3 = x2x3 - s0x1;
matrix->r3c1 = x1x3 - s0x2;
matrix->r2c1 = x1x2 + s0x3;
matrix->r3c2 = x2x3 + s0x1;
matrix->r1c3 = x1x3 + s0x2;
}
void bg_fp64_versor_get_rotation_matrix(const BgFP64Versor* versor, BgFP64Matrix3x3* matrix);
// =========== Make Reverse Matrix3x3 =========== //
static inline void bg_fp32_versor_get_reverse_matrix(const BgFP32Versor* versor, BgFP32Matrix3x3* matrix)
{
const float s0s0 = versor->s0 * versor->s0;
const float x1x1 = versor->x1 * versor->x1;
const float x2x2 = versor->x2 * versor->x2;
const float x3x3 = versor->x3 * versor->x3;
void bg_fp32_versor_get_reverse_matrix(const BgFP32Versor* versor, BgFP32Matrix3x3* matrix);
const float s0x1 = 2.0f * versor->s0 * versor->x1;
const float s0x2 = 2.0f * versor->s0 * versor->x2;
const float s0x3 = 2.0f * versor->s0 * versor->x3;
const float x1x2 = 2.0f * versor->x1 * versor->x2;
const float x1x3 = 2.0f * versor->x1 * versor->x3;
const float x2x3 = 2.0f * versor->x2 * versor->x3;
matrix->r1c1 = (s0s0 + x1x1) - (x2x2 + x3x3);
matrix->r2c2 = (s0s0 + x2x2) - (x1x1 + x3x3);
matrix->r3c3 = (s0s0 + x3x3) - (x1x1 + x2x2);
matrix->r1c2 = x1x2 + s0x3;
matrix->r2c3 = x2x3 + s0x1;
matrix->r3c1 = x1x3 + s0x2;
matrix->r2c1 = x1x2 - s0x3;
matrix->r3c2 = x2x3 - s0x1;
matrix->r1c3 = x1x3 - s0x2;
}
static inline void bg_fp64_versor_get_reverse_matrix(const BgFP64Versor* versor, BgFP64Matrix3x3* matrix)
{
const double s0s0 = versor->s0 * versor->s0;
const double x1x1 = versor->x1 * versor->x1;
const double x2x2 = versor->x2 * versor->x2;
const double x3x3 = versor->x3 * versor->x3;
const double s0x1 = 2.0 * versor->s0 * versor->x1;
const double s0x2 = 2.0 * versor->s0 * versor->x2;
const double s0x3 = 2.0 * versor->s0 * versor->x3;
const double x1x2 = 2.0 * versor->x1 * versor->x2;
const double x1x3 = 2.0 * versor->x1 * versor->x3;
const double x2x3 = 2.0 * versor->x2 * versor->x3;
matrix->r1c1 = (s0s0 + x1x1) - (x2x2 + x3x3);
matrix->r2c2 = (s0s0 + x2x2) - (x1x1 + x3x3);
matrix->r3c3 = (s0s0 + x3x3) - (x1x1 + x2x2);
matrix->r1c2 = x1x2 + s0x3;
matrix->r2c3 = x2x3 + s0x1;
matrix->r3c1 = x1x3 + s0x2;
matrix->r2c1 = x1x2 - s0x3;
matrix->r3c2 = x2x3 - s0x1;
matrix->r1c3 = x1x3 - s0x2;
}
void bg_fp64_versor_get_reverse_matrix(const BgFP64Versor* versor, BgFP64Matrix3x3* matrix);
// ================ Turn Vector ================= //
static inline void bg_fp32_versor_turn(const BgFP32Versor* versor, const BgFP32Vector3* vector, BgFP32Vector3* result)
{
const float tx1 = 2.0f * (versor->x2 * vector->x3 - versor->x3 * vector->x2);
const float tx2 = 2.0f * (versor->x3 * vector->x1 - versor->x1 * vector->x3);
const float tx3 = 2.0f * (versor->x1 * vector->x2 - versor->x2 * vector->x1);
void bg_fp32_versor_turn(const BgFP32Versor* versor, const BgFP32Vector3* vector, BgFP32Vector3* result);
const float x1 = (vector->x1 + tx1 * versor->s0) + (versor->x2 * tx3 - versor->x3 * tx2);
const float x2 = (vector->x2 + tx2 * versor->s0) + (versor->x3 * tx1 - versor->x1 * tx3);
const float x3 = (vector->x3 + tx3 * versor->s0) + (versor->x1 * tx2 - versor->x2 * tx1);
result->x1 = x1;
result->x2 = x2;
result->x3 = x3;
}
static inline void bg_fp64_versor_turn(const BgFP64Versor* versor, const BgFP64Vector3* vector, BgFP64Vector3* result)
{
const double tx1 = 2.0 * (versor->x2 * vector->x3 - versor->x3 * vector->x2);
const double tx2 = 2.0 * (versor->x3 * vector->x1 - versor->x1 * vector->x3);
const double tx3 = 2.0 * (versor->x1 * vector->x2 - versor->x2 * vector->x1);
const double x1 = (vector->x1 + tx1 * versor->s0) + (versor->x2 * tx3 - versor->x3 * tx2);
const double x2 = (vector->x2 + tx2 * versor->s0) + (versor->x3 * tx1 - versor->x1 * tx3);
const double x3 = (vector->x3 + tx3 * versor->s0) + (versor->x1 * tx2 - versor->x2 * tx1);
result->x1 = x1;
result->x2 = x2;
result->x3 = x3;
}
// ================ Turn2 Vector ================ //
static inline void bg_fp32_versor_turn2(const BgFP32Versor* versor, const BgFP32Vector3* vector, BgFP32Vector3* result)
{
const float s0s0 = versor->s0 * versor->s0;
const float x1x1 = versor->x1 * versor->x1;
const float x2x2 = versor->x2 * versor->x2;
const float x3x3 = versor->x3 * versor->x3;
const float s0x1 = 2.0f * versor->s0 * versor->x1;
const float s0x2 = 2.0f * versor->s0 * versor->x2;
const float s0x3 = 2.0f * versor->s0 * versor->x3;
const float x1x2 = 2.0f * versor->x1 * versor->x2;
const float x1x3 = 2.0f * versor->x1 * versor->x3;
const float x2x3 = 2.0f * versor->x2 * versor->x3;
const float r2c1 = x1x2 + s0x3;
const float r3c2 = x2x3 + s0x1;
const float r1c3 = x1x3 + s0x2;
const float r1c1 = (s0s0 + x1x1) - (x2x2 + x3x3);
const float r2c2 = (s0s0 + x2x2) - (x1x1 + x3x3);
const float r3c3 = (s0s0 + x3x3) - (x1x1 + x2x2);
const float r1c2 = x1x2 - s0x3;
const float r2c3 = x2x3 - s0x1;
const float r3c1 = x1x3 - s0x2;
const float x1 = r1c1 * vector->x1 + r1c2 * vector->x2 + r1c3 * vector->x3;
const float x2 = r2c1 * vector->x1 + r2c2 * vector->x2 + r2c3 * vector->x3;
const float x3 = r3c1 * vector->x1 + r3c2 * vector->x2 + r3c3 * vector->x3;
result->x1 = x1;
result->x2 = x2;
result->x3 = x3;
}
static inline void bg_fp64_versor_turn2(const BgFP64Versor* versor, const BgFP64Vector3* vector, BgFP64Vector3* result)
{
const double s0s0 = versor->s0 * versor->s0;
const double x1x1 = versor->x1 * versor->x1;
const double x2x2 = versor->x2 * versor->x2;
const double x3x3 = versor->x3 * versor->x3;
const double s0x1 = 2.0f * versor->s0 * versor->x1;
const double s0x2 = 2.0f * versor->s0 * versor->x2;
const double s0x3 = 2.0f * versor->s0 * versor->x3;
const double x1x2 = 2.0f * versor->x1 * versor->x2;
const double x1x3 = 2.0f * versor->x1 * versor->x3;
const double x2x3 = 2.0f * versor->x2 * versor->x3;
const double r2c1 = x1x2 + s0x3;
const double r3c2 = x2x3 + s0x1;
const double r1c3 = x1x3 + s0x2;
const double r1c1 = (s0s0 + x1x1) - (x2x2 + x3x3);
const double r2c2 = (s0s0 + x2x2) - (x1x1 + x3x3);
const double r3c3 = (s0s0 + x3x3) - (x1x1 + x2x2);
const double r1c2 = x1x2 - s0x3;
const double r2c3 = x2x3 - s0x1;
const double r3c1 = x1x3 - s0x2;
const double x1 = r1c1 * vector->x1 + r1c2 * vector->x2 + r1c3 * vector->x3;
const double x2 = r2c1 * vector->x1 + r2c2 * vector->x2 + r2c3 * vector->x3;
const double x3 = r3c1 * vector->x1 + r3c2 * vector->x2 + r3c3 * vector->x3;
result->x1 = x1;
result->x2 = x2;
result->x3 = x3;
}
void bg_fp64_versor_turn(const BgFP64Versor* versor, const BgFP64Vector3* vector, BgFP64Vector3* result);
// ============== Turn Vector Back ============== //
static inline void bg_fp32_versor_turn_back(const BgFP32Versor* versor, const BgFP32Vector3* vector, BgFP32Vector3* result)
{
const float tx1 = 2.0f * (versor->x2 * vector->x3 - versor->x3 * vector->x2);
const float tx2 = 2.0f * (versor->x3 * vector->x1 - versor->x1 * vector->x3);
const float tx3 = 2.0f * (versor->x1 * vector->x2 - versor->x2 * vector->x1);
void bg_fp32_versor_turn_back(const BgFP32Versor* versor, const BgFP32Vector3* vector, BgFP32Vector3* result);
const float x1 = (vector->x1 - tx1 * versor->s0) + (versor->x2 * tx3 - versor->x3 * tx2);
const float x2 = (vector->x2 - tx2 * versor->s0) + (versor->x3 * tx1 - versor->x1 * tx3);
const float x3 = (vector->x3 - tx3 * versor->s0) + (versor->x1 * tx2 - versor->x2 * tx1);
result->x1 = x1;
result->x2 = x2;
result->x3 = x3;
}
static inline void bg_fp64_versor_turn_back(const BgFP64Versor* versor, const BgFP64Vector3* vector, BgFP64Vector3* result)
{
const double tx1 = 2.0 * (versor->x2 * vector->x3 - versor->x3 * vector->x2);
const double tx2 = 2.0 * (versor->x3 * vector->x1 - versor->x1 * vector->x3);
const double tx3 = 2.0 * (versor->x1 * vector->x2 - versor->x2 * vector->x1);
const double x1 = (vector->x1 - tx1 * versor->s0) + (versor->x2 * tx3 - versor->x3 * tx2);
const double x2 = (vector->x2 - tx2 * versor->s0) + (versor->x3 * tx1 - versor->x1 * tx3);
const double x3 = (vector->x3 - tx3 * versor->s0) + (versor->x1 * tx2 - versor->x2 * tx1);
result->x1 = x1;
result->x2 = x2;
result->x3 = x3;
}
// ============== Turn Vector Back2 ============= //
static inline void bg_fp32_versor_turn_back2(const BgFP32Versor* versor, const BgFP32Vector3* vector, BgFP32Vector3* result)
{
const float s0s0 = versor->s0 * versor->s0;
const float x1x1 = versor->x1 * versor->x1;
const float x2x2 = versor->x2 * versor->x2;
const float x3x3 = versor->x3 * versor->x3;
const float s0x1 = 2.0f * versor->s0 * versor->x1;
const float s0x2 = 2.0f * versor->s0 * versor->x2;
const float s0x3 = 2.0f * versor->s0 * versor->x3;
const float x1x2 = 2.0f * versor->x1 * versor->x2;
const float x1x3 = 2.0f * versor->x1 * versor->x3;
const float x2x3 = 2.0f * versor->x2 * versor->x3;
const float r1c2 = x1x2 + s0x3;
const float r2c3 = x2x3 + s0x1;
const float r3c1 = x1x3 + s0x2;
const float r1c1 = (s0s0 + x1x1) - (x2x2 + x3x3);
const float r2c2 = (s0s0 + x2x2) - (x1x1 + x3x3);
const float r3c3 = (s0s0 + x3x3) - (x1x1 + x2x2);
const float r2c1 = x1x2 - s0x3;
const float r3c2 = x2x3 - s0x1;
const float r1c3 = x1x3 - s0x2;
const float x1 = r1c1 * vector->x1 + r1c2 * vector->x2 + r1c3 * vector->x3;
const float x2 = r2c1 * vector->x1 + r2c2 * vector->x2 + r2c3 * vector->x3;
const float x3 = r3c1 * vector->x1 + r3c2 * vector->x2 + r3c3 * vector->x3;
result->x1 = x1;
result->x2 = x2;
result->x3 = x3;
}
static inline void bg_fp64_versor_turn_back2(const BgFP64Versor* versor, const BgFP64Vector3* vector, BgFP64Vector3* result)
{
const double s0s0 = versor->s0 * versor->s0;
const double x1x1 = versor->x1 * versor->x1;
const double x2x2 = versor->x2 * versor->x2;
const double x3x3 = versor->x3 * versor->x3;
const double s0x1 = 2.0f * versor->s0 * versor->x1;
const double s0x2 = 2.0f * versor->s0 * versor->x2;
const double s0x3 = 2.0f * versor->s0 * versor->x3;
const double x1x2 = 2.0f * versor->x1 * versor->x2;
const double x1x3 = 2.0f * versor->x1 * versor->x3;
const double x2x3 = 2.0f * versor->x2 * versor->x3;
const double r1c2 = x1x2 + s0x3;
const double r2c3 = x2x3 + s0x1;
const double r3c1 = x1x3 + s0x2;
const double r1c1 = (s0s0 + x1x1) - (x2x2 + x3x3);
const double r2c2 = (s0s0 + x2x2) - (x1x1 + x3x3);
const double r3c3 = (s0s0 + x3x3) - (x1x1 + x2x2);
const double r2c1 = x1x2 - s0x3;
const double r3c2 = x2x3 - s0x1;
const double r1c3 = x1x3 - s0x2;
const double x1 = r1c1 * vector->x1 + r1c2 * vector->x2 + r1c3 * vector->x3;
const double x2 = r2c1 * vector->x1 + r2c2 * vector->x2 + r2c3 * vector->x3;
const double x3 = r3c1 * vector->x1 + r3c2 * vector->x2 + r3c3 * vector->x3;
result->x1 = x1;
result->x2 = x2;
result->x3 = x3;
}
void bg_fp64_versor_turn_back(const BgFP64Versor* versor, const BgFP64Vector3* vector, BgFP64Vector3* result);
#endif