Оптимизация под GNU C компилятор / Optimization for GNU C Compiller

This commit is contained in:
Andrey Pokidov 2024-11-26 13:20:12 +07:00
parent e4d75824f3
commit 081f794eb1
10 changed files with 671 additions and 755 deletions

View file

@ -1,105 +1 @@
#include "matrix2x2.h" #include "matrix2x2.h"
// ================= Inversion ================== //
int bg_fp32_matrix2x2_invert(BgFP32Matrix2x2* matrix)
{
const float determinant = bg_fp32_matrix2x2_get_determinant(matrix);
if (-BG_FP32_EPSYLON <= determinant && determinant <= BG_FP32_EPSYLON) {
return 0;
}
const float r1c1 = matrix->r2c2;
const float r1c2 = -matrix->r1c2;
const float r2c1 = -matrix->r2c1;
const float r2c2 = matrix->r1c1;
const float multiplier = 1.0f / determinant;
matrix->r1c1 = r1c1 * multiplier;
matrix->r1c2 = r1c2 * multiplier;
matrix->r2c1 = r2c1 * multiplier;
matrix->r2c2 = r2c2 * multiplier;
return 1;
}
int bg_fp64_matrix2x2_invert(BgFP64Matrix2x2* matrix)
{
const double determinant = bg_fp64_matrix2x2_get_determinant(matrix);
if (-BG_FP64_EPSYLON <= determinant && determinant <= BG_FP64_EPSYLON) {
return 0;
}
const double r1c1 = matrix->r2c2;
const double r1c2 = -matrix->r1c2;
const double r2c1 = -matrix->r2c1;
const double r2c2 = matrix->r1c1;
const double multiplier = 1.0 / determinant;
matrix->r1c1 = r1c1 * multiplier;
matrix->r1c2 = r1c2 * multiplier;
matrix->r2c1 = r2c1 * multiplier;
matrix->r2c2 = r2c2 * multiplier;
return 1;
}
// ================ Set Inverted ================ //
int bg_fp32_matrix2x2_set_inverted(const BgFP32Matrix2x2* from, BgFP32Matrix2x2* to)
{
const float determinant = bg_fp32_matrix2x2_get_determinant(from);
if (-BG_FP32_EPSYLON <= determinant && determinant <= BG_FP32_EPSYLON) {
return 0;
}
const float r1c1 = from->r2c2;
const float r1c2 = -from->r1c2;
const float r2c1 = -from->r2c1;
const float r2c2 = from->r1c1;
const float multiplier = 1.0f / determinant;
to->r1c1 = r1c1 * multiplier;
to->r1c2 = r1c2 * multiplier;
to->r2c1 = r2c1 * multiplier;
to->r2c2 = r2c2 * multiplier;
return 1;
}
int bg_fp64_matrix2x2_set_inverted(const BgFP64Matrix2x2* from, BgFP64Matrix2x2* to)
{
const double determinant = bg_fp64_matrix2x2_get_determinant(from);
if (-BG_FP64_EPSYLON <= determinant && determinant <= BG_FP64_EPSYLON) {
return 0;
}
const double r1c1 = from->r2c2;
const double r1c2 = -from->r1c2;
const double r2c1 = -from->r2c1;
const double r2c2 = from->r1c1;
const double multiplier = 1.0 / determinant;
to->r1c1 = r1c1 * multiplier;
to->r1c2 = r1c2 * multiplier;
to->r2c1 = r2c1 * multiplier;
to->r2c2 = r2c2 * multiplier;
return 1;
}

View file

@ -171,9 +171,55 @@ static inline void bg_fp64_matrix2x2_transpose(BgFP64Matrix2x2* matrix)
// ================= Inversion ================== // // ================= Inversion ================== //
int bg_fp32_matrix2x2_invert(BgFP32Matrix2x2* matrix); static inline int bg_fp32_matrix2x2_invert(BgFP32Matrix2x2* matrix)
{
const float determinant = bg_fp32_matrix2x2_get_determinant(matrix);
int bg_fp64_matrix2x2_invert(BgFP64Matrix2x2* matrix); if (-BG_FP32_EPSYLON <= determinant && determinant <= BG_FP32_EPSYLON) {
return 0;
}
const float r1c1 = matrix->r2c2;
const float r1c2 = -matrix->r1c2;
const float r2c1 = -matrix->r2c1;
const float r2c2 = matrix->r1c1;
const float multiplier = 1.0f / determinant;
matrix->r1c1 = r1c1 * multiplier;
matrix->r1c2 = r1c2 * multiplier;
matrix->r2c1 = r2c1 * multiplier;
matrix->r2c2 = r2c2 * multiplier;
return 1;
}
static inline int bg_fp64_matrix2x2_invert(BgFP64Matrix2x2* matrix)
{
const double determinant = bg_fp64_matrix2x2_get_determinant(matrix);
if (-BG_FP64_EPSYLON <= determinant && determinant <= BG_FP64_EPSYLON) {
return 0;
}
const double r1c1 = matrix->r2c2;
const double r1c2 = -matrix->r1c2;
const double r2c1 = -matrix->r2c1;
const double r2c2 = matrix->r1c1;
const double multiplier = 1.0 / determinant;
matrix->r1c1 = r1c1 * multiplier;
matrix->r1c2 = r1c2 * multiplier;
matrix->r2c1 = r2c1 * multiplier;
matrix->r2c2 = r2c2 * multiplier;
return 1;
}
// =============== Set Transposed =============== // // =============== Set Transposed =============== //
@ -201,9 +247,55 @@ static inline void bg_fp64_matrix2x2_set_transposed(const BgFP64Matrix2x2* from,
// ================ Set Inverted ================ // // ================ Set Inverted ================ //
int bg_fp32_matrix2x2_set_inverted(const BgFP32Matrix2x2* from, BgFP32Matrix2x2* to); static inline int bg_fp32_matrix2x2_set_inverted(const BgFP32Matrix2x2* from, BgFP32Matrix2x2* to)
{
const float determinant = bg_fp32_matrix2x2_get_determinant(from);
int bg_fp64_matrix2x2_set_inverted(const BgFP64Matrix2x2* from, BgFP64Matrix2x2* to); if (-BG_FP32_EPSYLON <= determinant && determinant <= BG_FP32_EPSYLON) {
return 0;
}
const float r1c1 = from->r2c2;
const float r1c2 = -from->r1c2;
const float r2c1 = -from->r2c1;
const float r2c2 = from->r1c1;
const float multiplier = 1.0f / determinant;
to->r1c1 = r1c1 * multiplier;
to->r1c2 = r1c2 * multiplier;
to->r2c1 = r2c1 * multiplier;
to->r2c2 = r2c2 * multiplier;
return 1;
}
static inline int bg_fp64_matrix2x2_set_inverted(const BgFP64Matrix2x2* from, BgFP64Matrix2x2* to)
{
const double determinant = bg_fp64_matrix2x2_get_determinant(from);
if (-BG_FP64_EPSYLON <= determinant && determinant <= BG_FP64_EPSYLON) {
return 0;
}
const double r1c1 = from->r2c2;
const double r1c2 = -from->r1c2;
const double r2c1 = -from->r2c1;
const double r2c2 = from->r1c1;
const double multiplier = 1.0 / determinant;
to->r1c1 = r1c1 * multiplier;
to->r1c2 = r1c2 * multiplier;
to->r2c1 = r2c1 * multiplier;
to->r2c2 = r2c2 * multiplier;
return 1;
}
// ================= Set Row 1 ================== // // ================= Set Row 1 ================== //

View file

@ -1,233 +1,2 @@
#include "quaternion.h" #include "quaternion.h"
// =============== Normalization ================ //
int bg_fp32_quaternion_normalize(BgFP32Quaternion* quaternion)
{
const float square_modulus = bg_fp32_quaternion_get_square_modulus(quaternion);
if (1.0f - BG_FP32_TWO_EPSYLON <= square_modulus && square_modulus <= 1.0f + BG_FP32_TWO_EPSYLON) {
return 1;
}
if (square_modulus <= BG_FP32_SQUARE_EPSYLON) {
bg_fp32_quaternion_reset(quaternion);
return 0;
}
const float multiplier = sqrtf(1.0f / square_modulus);
quaternion->s0 *= multiplier;
quaternion->x1 *= multiplier;
quaternion->x2 *= multiplier;
quaternion->x3 *= multiplier;
return 1;
}
int bg_fp64_quaternion_normalize(BgFP64Quaternion* quaternion)
{
const double square_modulus = bg_fp64_quaternion_get_square_modulus(quaternion);
if (1.0 - BG_FP64_TWO_EPSYLON <= square_modulus && square_modulus <= 1.0 + BG_FP64_TWO_EPSYLON) {
return 1;
}
if (square_modulus <= BG_FP32_SQUARE_EPSYLON) {
bg_fp64_quaternion_reset(quaternion);
return 0;
}
const double multiplier = sqrt(1.0 / square_modulus);
quaternion->s0 *= multiplier;
quaternion->x1 *= multiplier;
quaternion->x2 *= multiplier;
quaternion->x3 *= multiplier;
return 1;
}
// ============ Make Rotation Matrix ============ //
void bg_fp32_quaternion_get_rotation_matrix(const BgFP32Quaternion* quaternion, BgFP32Matrix3x3* matrix)
{
const float s0s0 = quaternion->s0 * quaternion->s0;
const float x1x1 = quaternion->x1 * quaternion->x1;
const float x2x2 = quaternion->x2 * quaternion->x2;
const float x3x3 = quaternion->x3 * quaternion->x3;
const float square_modulus = (s0s0 + x1x1) + (x2x2 + x3x3);
if (-BG_FP32_EPSYLON <= square_modulus && square_modulus <= BG_FP32_EPSYLON)
{
bg_fp32_matrix3x3_set_to_identity(matrix);
return;
}
const float corrector1 = 1.0f / square_modulus;
const float corrector2 = 2.0f * corrector1;
const float s0x1 = quaternion->s0 * quaternion->x1;
const float s0x2 = quaternion->s0 * quaternion->x2;
const float s0x3 = quaternion->s0 * quaternion->x3;
const float x1x2 = quaternion->x1 * quaternion->x2;
const float x1x3 = quaternion->x1 * quaternion->x3;
const float x2x3 = quaternion->x2 * quaternion->x3;
matrix->r1c1 = corrector1 * ((s0s0 + x1x1) - (x2x2 + x3x3));
matrix->r2c2 = corrector1 * ((s0s0 + x2x2) - (x1x1 + x3x3));
matrix->r3c3 = corrector1 * ((s0s0 + x3x3) - (x1x1 + x2x2));
matrix->r1c2 = corrector2 * (x1x2 - s0x3);
matrix->r2c3 = corrector2 * (x2x3 - s0x1);
matrix->r3c1 = corrector2 * (x1x3 - s0x2);
matrix->r2c1 = corrector2 * (x1x2 + s0x3);
matrix->r3c2 = corrector2 * (x2x3 + s0x1);
matrix->r1c3 = corrector2 * (x1x3 + s0x2);
}
void bg_fp64_quaternion_get_rotation_matrix(const BgFP64Quaternion* quaternion, BgFP64Matrix3x3* matrix)
{
const double s0s0 = quaternion->s0 * quaternion->s0;
const double x1x1 = quaternion->x1 * quaternion->x1;
const double x2x2 = quaternion->x2 * quaternion->x2;
const double x3x3 = quaternion->x3 * quaternion->x3;
const double square_modulus = (s0s0 + x1x1) + (x2x2 + x3x3);
if (-BG_FP64_EPSYLON <= square_modulus && square_modulus <= BG_FP64_EPSYLON)
{
bg_fp64_matrix3x3_set_to_identity(matrix);
return;
}
const double corrector1 = 1.0f / square_modulus;
const double corrector2 = 2.0f * corrector1;
const double s0x1 = quaternion->s0 * quaternion->x1;
const double s0x2 = quaternion->s0 * quaternion->x2;
const double s0x3 = quaternion->s0 * quaternion->x3;
const double x1x2 = quaternion->x1 * quaternion->x2;
const double x1x3 = quaternion->x1 * quaternion->x3;
const double x2x3 = quaternion->x2 * quaternion->x3;
matrix->r1c1 = corrector1 * ((s0s0 + x1x1) - (x2x2 + x3x3));
matrix->r2c2 = corrector1 * ((s0s0 + x2x2) - (x1x1 + x3x3));
matrix->r3c3 = corrector1 * ((s0s0 + x3x3) - (x1x1 + x2x2));
matrix->r1c2 = corrector2 * (x1x2 - s0x3);
matrix->r2c3 = corrector2 * (x2x3 - s0x1);
matrix->r3c1 = corrector2 * (x1x3 - s0x2);
matrix->r2c1 = corrector2 * (x1x2 + s0x3);
matrix->r3c2 = corrector2 * (x2x3 + s0x1);
matrix->r1c3 = corrector2 * (x1x3 + s0x2);
}
// ============ Make Reverse Matrix ============= //
void bg_fp32_quaternion_get_reverse_matrix(const BgFP32Quaternion* quaternion, BgFP32Matrix3x3* matrix)
{
const float s0s0 = quaternion->s0 * quaternion->s0;
const float x1x1 = quaternion->x1 * quaternion->x1;
const float x2x2 = quaternion->x2 * quaternion->x2;
const float x3x3 = quaternion->x3 * quaternion->x3;
const float square_modulus = (s0s0 + x1x1) + (x2x2 + x3x3);
if (-BG_FP32_EPSYLON <= square_modulus && square_modulus <= BG_FP32_EPSYLON)
{
bg_fp32_matrix3x3_set_to_identity(matrix);
return;
}
const float corrector1 = 1.0f / square_modulus;
const float corrector2 = 2.0f * corrector1;
const float s0x1 = quaternion->s0 * quaternion->x1;
const float s0x2 = quaternion->s0 * quaternion->x2;
const float s0x3 = quaternion->s0 * quaternion->x3;
const float x1x2 = quaternion->x1 * quaternion->x2;
const float x1x3 = quaternion->x1 * quaternion->x3;
const float x2x3 = quaternion->x2 * quaternion->x3;
matrix->r1c1 = corrector1 * ((s0s0 + x1x1) - (x2x2 + x3x3));
matrix->r2c2 = corrector1 * ((s0s0 + x2x2) - (x1x1 + x3x3));
matrix->r3c3 = corrector1 * ((s0s0 + x3x3) - (x1x1 + x2x2));
matrix->r1c2 = corrector2 * (x1x2 + s0x3);
matrix->r2c3 = corrector2 * (x2x3 + s0x1);
matrix->r3c1 = corrector2 * (x1x3 + s0x2);
matrix->r2c1 = corrector2 * (x1x2 - s0x3);
matrix->r3c2 = corrector2 * (x2x3 - s0x1);
matrix->r1c3 = corrector2 * (x1x3 - s0x2);
}
void bg_fp64_quaternion_get_reverse_matrix(const BgFP64Quaternion* quaternion, BgFP64Matrix3x3* matrix)
{
const double s0s0 = quaternion->s0 * quaternion->s0;
const double x1x1 = quaternion->x1 * quaternion->x1;
const double x2x2 = quaternion->x2 * quaternion->x2;
const double x3x3 = quaternion->x3 * quaternion->x3;
const double square_modulus = (s0s0 + x1x1) + (x2x2 + x3x3);
if (-BG_FP64_EPSYLON <= square_modulus && square_modulus <= BG_FP64_EPSYLON)
{
bg_fp64_matrix3x3_set_to_identity(matrix);
return;
}
const double corrector1 = 1.0f / square_modulus;
const double corrector2 = 2.0f * corrector1;
const double s0x1 = quaternion->s0 * quaternion->x1;
const double s0x2 = quaternion->s0 * quaternion->x2;
const double s0x3 = quaternion->s0 * quaternion->x3;
const double x1x2 = quaternion->x1 * quaternion->x2;
const double x1x3 = quaternion->x1 * quaternion->x3;
const double x2x3 = quaternion->x2 * quaternion->x3;
matrix->r1c1 = corrector1 * ((s0s0 + x1x1) - (x2x2 + x3x3));
matrix->r2c2 = corrector1 * ((s0s0 + x2x2) - (x1x1 + x3x3));
matrix->r3c3 = corrector1 * ((s0s0 + x3x3) - (x1x1 + x2x2));
matrix->r1c2 = corrector2 * (x1x2 + s0x3);
matrix->r2c3 = corrector2 * (x2x3 + s0x1);
matrix->r3c1 = corrector2 * (x1x3 + s0x2);
matrix->r2c1 = corrector2 * (x1x2 - s0x3);
matrix->r3c2 = corrector2 * (x2x3 - s0x1);
matrix->r1c3 = corrector2 * (x1x3 - s0x2);
}
// ================== Product =================== //
void bg_fp32_quaternion_get_product(const BgFP32Quaternion* left, const BgFP32Quaternion* right, BgFP32Quaternion* product)
{
const float s0 = (left->s0 * right->s0 - left->x1 * right->x1) - (left->x2 * right->x2 + left->x3 * right->x3);
const float x1 = (left->x1 * right->s0 + left->s0 * right->x1) - (left->x3 * right->x2 - left->x2 * right->x3);
const float x2 = (left->x2 * right->s0 + left->s0 * right->x2) - (left->x1 * right->x3 - left->x3 * right->x1);
const float x3 = (left->x3 * right->s0 + left->s0 * right->x3) - (left->x2 * right->x1 - left->x1 * right->x2);
product->s0 = s0;
product->x1 = x1;
product->x2 = x2;
product->x3 = x3;
}
void bg_fp64_quaternion_get_product(const BgFP64Quaternion* left, const BgFP64Quaternion* right, BgFP64Quaternion* product)
{
const double s0 = (left->s0 * right->s0 - left->x1 * right->x1) - (left->x2 * right->x2 + left->x3 * right->x3);
const double x1 = (left->x1 * right->s0 + left->s0 * right->x1) - (left->x3 * right->x2 - left->x2 * right->x3);
const double x2 = (left->x2 * right->s0 + left->s0 * right->x2) - (left->x1 * right->x3 - left->x3 * right->x1);
const double x3 = (left->x3 * right->s0 + left->s0 * right->x3) - (left->x2 * right->x1 - left->x1 * right->x2);
product->s0 = s0;
product->x1 = x1;
product->x2 = x2;
product->x3 = x3;
}

View file

Internal server error - Personal Git Server: Beyond coding. We Forge.

500

Internal server error

Forgejo version: 11.0.1+gitea-1.22.0

@ -183,21 +183,207 @@ static inline double bg_fp64_quaternion_get_modulus(const BgFP64Quaternion* quat
// =============== Normalization ================ // // =============== Normalization ================ //
int bg_fp32_quaternion_normalize(BgFP32Quaternion* quaternion); static inline int bg_fp32_quaternion_normalize(BgFP32Quaternion* quaternion)
{
const float square_modulus = bg_fp32_quaternion_get_square_modulus(quaternion);
int bg_fp64_quaternion_normalize(BgFP64Quaternion* quaternion); if (1.0f - BG_FP32_TWO_EPSYLON <= square_modulus && square_modulus <= 1.0f + BG_FP32_TWO_EPSYLON) {
return 1;
}
if (square_modulus <= BG_FP32_SQUARE_EPSYLON) {
bg_fp32_quaternion_reset(quaternion);
return 0;
}
const float multiplier = sqrtf(1.0f / square_modulus);
quaternion->s0 *= multiplier;
quaternion->x1 *= multiplier;
quaternion->x2 *= multiplier;
quaternion->x3 *= multiplier;
return 1;
}
static inline int bg_fp64_quaternion_normalize(BgFP64Quaternion* quaternion)
{
const double square_modulus = bg_fp64_quaternion_get_square_modulus(quaternion);
if (1.0 - BG_FP64_TWO_EPSYLON <= square_modulus && square_modulus <= 1.0 + BG_FP64_TWO_EPSYLON) {
return 1;
}
if (square_modulus <= BG_FP32_SQUARE_EPSYLON) {
bg_fp64_quaternion_reset(quaternion);
return 0;
}
const double multiplier = sqrt(1.0 / square_modulus);
quaternion->s0 *= multiplier;
quaternion->x1 *= multiplier;
quaternion->x2 *= multiplier;
quaternion->x3 *= multiplier;
return 1;
}
// ============ Make Rotation Matrix ============ // // ============ Make Rotation Matrix ============ //
void bg_fp32_quaternion_get_rotation_matrix(const BgFP32Quaternion* quaternion, BgFP32Matrix3x3* matrix); void bg_fp32_quaternion_get_rotation_matrix(const BgFP32Quaternion* quaternion, BgFP32Matrix3x3* matrix)
{
const float s0s0 = quaternion->s0 * quaternion->s0;
const float x1x1 = quaternion->x1 * quaternion->x1;
const float x2x2 = quaternion->x2 * quaternion->x2;
const float x3x3 = quaternion->x3 * quaternion->x3;
void bg_fp64_quaternion_get_rotation_matrix(const BgFP64Quaternion* quaternion, BgFP64Matrix3x3* matrix); const float square_modulus = (s0s0 + x1x1) + (x2x2 + x3x3);
if (-BG_FP32_EPSYLON <= square_modulus && square_modulus <= BG_FP32_EPSYLON)
{
bg_fp32_matrix3x3_set_to_identity(matrix);
return;
}
const float corrector1 = 1.0f / square_modulus;
const float corrector2 = 2.0f * corrector1;
const float s0x1 = quaternion->s0 * quaternion->x1;
const float s0x2 = quaternion->s0 * quaternion->x2;
const float s0x3 = quaternion->s0 * quaternion->x3;
const float x1x2 = quaternion->x1 * quaternion->x2;
const float x1x3 = quaternion->x1 * quaternion->x3;
const float x2x3 = quaternion->x2 * quaternion->x3;
matrix->r1c1 = corrector1 * ((s0s0 + x1x1) - (x2x2 + x3x3));
matrix->r2c2 = corrector1 * ((s0s0 + x2x2) - (x1x1 + x3x3));
matrix->r3c3 = corrector1 * ((s0s0 + x3x3) - (x1x1 + x2x2));
matrix->r1c2 = corrector2 * (x1x2 - s0x3);
matrix->r2c3 = corrector2 * (x2x3 - s0x1);
matrix->r3c1 = corrector2 * (x1x3 - s0x2);
matrix->r2c1 = corrector2 * (x1x2 + s0x3);
matrix->r3c2 = corrector2 * (x2x3 + s0x1);
matrix->r1c3 = corrector2 * (x1x3 + s0x2);
}
void bg_fp64_quaternion_get_rotation_matrix(const BgFP64Quaternion* quaternion, BgFP64Matrix3x3* matrix)
{
const double s0s0 = quaternion->s0 * quaternion->s0;
const double x1x1 = quaternion->x1 * quaternion->x1;
const double x2x2 = quaternion->x2 * quaternion->x2;
const double x3x3 = quaternion->x3 * quaternion->x3;
const double square_modulus = (s0s0 + x1x1) + (x2x2 + x3x3);
if (-BG_FP64_EPSYLON <= square_modulus && square_modulus <= BG_FP64_EPSYLON)
{
bg_fp64_matrix3x3_set_to_identity(matrix);
return;
}
const double corrector1 = 1.0f / square_modulus;
const double corrector2 = 2.0f * corrector1;
const double s0x1 = quaternion->s0 * quaternion->x1;
const double s0x2 = quaternion->s0 * quaternion->x2;
const double s0x3 = quaternion->s0 * quaternion->x3;
const double x1x2 = quaternion->x1 * quaternion->x2;
const double x1x3 = quaternion->x1 * quaternion->x3;
const double x2x3 = quaternion->x2 * quaternion->x3;
matrix->r1c1 = corrector1 * ((s0s0 + x1x1) - (x2x2 + x3x3));
matrix->r2c2 = corrector1 * ((s0s0 + x2x2) - (x1x1 + x3x3));
matrix->r3c3 = corrector1 * ((s0s0 + x3x3) - (x1x1 + x2x2));
matrix->r1c2 = corrector2 * (x1x2 - s0x3);
matrix->r2c3 = corrector2 * (x2x3 - s0x1);
matrix->r3c1 = corrector2 * (x1x3 - s0x2);
matrix->r2c1 = corrector2 * (x1x2 + s0x3);
matrix->r3c2 = corrector2 * (x2x3 + s0x1);