Тестирование производительности: версоры + матрица + поворот

2024-11-27 02:51:07 +07:00 · 2024-11-27 02:51:07 +07:00 · a30629df67
commit a30629df67
parent 5d4472150b
3 changed files with 78 additions and 4 deletions
--- a/basic-geometry-dev/main.c
+++ b/basic-geometry-dev/main.c
@ -9,7 +9,46 @@
 #include <time.h>
 #endif // _WINDOWS_

-BgFP32Versor * allocate_versors(const unsigned int amount)
+BgFP32Vector3* allocate_vectors3(const unsigned int amount)
+{
+    return calloc(amount, sizeof(BgFP32Vector3));
+}
+
+BgFP32Vector3* make_zero_vectors3(const unsigned int amount)
+{
+    BgFP32Vector3* list = allocate_vectors3(amount);
+
+    if (list == 0) {
+        return 0;
+    }
+
+    for (unsigned int i = 0; i < amount; i++) {
+        bg_fp32_vector3_reset(&list[i]);
+    }
+
+    return list;
+}
+
+BgFP32Vector3* make_random_vectors3(const unsigned int amount)
+{
+    BgFP32Vector3* list = allocate_vectors3(amount);
+
+    if (list == 0) {
+        return 0;
+    }
+
+    const float multiplier = 2.0f / RAND_MAX;
+
+    for (unsigned int i = 0; i < amount; i++) {
+        list[i].x1 = rand() * multiplier - 1.0f;
+        list[i].x2 = rand() * multiplier - 1.0f;
+        list[i].x3 = rand() * multiplier - 1.0f;
+    }
+
+    return list;
+}
+
+BgFP32Versor* allocate_versors(const unsigned int amount)
 {
    return calloc(amount, sizeof(BgFP32Versor));
 }
@ -160,6 +199,27 @@ int main()
        return 0;
    }

+    BgFP32Matrix3x3* matrixes =malloc(amount * sizeof(BgFP32Matrix3x3));
+
+    if (matrixes == 0) {
+        printf("Cannot allocate memory for matrixes");
+        free(results);
+        free(versors2);
+        free(versors1);
+        return 0;
+    }
+
+    BgFP32Vector3* vectors = make_random_vectors3(amount);
+
+    if (results == 0) {
+        printf("Cannot allocate memory for result vectors");
+        free(matrixes);
+        free(results);
+        free(versors2);
+        free(versors1);
+        return 0;
+    }
+
 #ifdef _WIN64
    end = GetTickCount64();
    printf("Setup time: %lld\n", end - now);
@ -173,9 +233,9 @@ int main()
 #endif // _WIN64
    for (int j = 0; j < 1000; j++) {
        for (unsigned int i = 0; i < amount; i++) {
-            bg_fp32_versor_shorten(&versors1[i]);
-            bg_fp32_versor_shorten(&versors2[i]);
-            //bg_fp32_versor_combine(&versors1[i], &versors2[i], &results[i]);
+            bg_fp32_versor_combine(&versors1[i], &versors2[i], &results[i]);
+            bg_fp32_versor_get_rotation_matrix(&versors1[i], &matrixes[i]);
+            bg_fp32_matrix3x3_right_product(&matrixes[i], &vectors[i], &vectors[i]);
        }
    }

@ -193,6 +253,8 @@ int main()
    print_versor(versors2 + 10);
    print_versor(results + 10);

+    free(vectors);
+    free(matrixes);
    free(results);
    free(versors2);
    free(versors1);
--- a/basic-geometry/versor.c
+++ b/basic-geometry/versor.c
@ -7,6 +7,16 @@ const BgFP32Versor BG_FP32_IDLE_VERSOR = { 1.0f, 0.0f, 0.0f, 0.0f };

 const BgFP64Versor BG_FP64_IDLE_VERSOR = { 1.0, 0.0, 0.0, 0.0 };

+void __bg_fp32_versor_normalize(const float square_modulus, __BgFP32DarkTwinVersor* twin)
+{
+    const float multiplier = sqrtf(1.0f / square_modulus);
+
+    twin->s0 *= multiplier;
+    twin->x1 *= multiplier;
+    twin->x2 *= multiplier;
+    twin->x3 *= multiplier;
+}
+
 // =============== Set Crude Turn =============== //

 void bg_fp32_versor_set_crude_turn(const float x1, const float x2, const float x3, const float angle, const angle_unit_t unit, BgFP32Versor* result)
--- a/basic-geometry/versor.h
+++ b/basic-geometry/versor.h
@ -408,6 +408,8 @@ static inline void bg_fp64_versor_set_inverted_fp32(const BgFP32Versor* versor,

 // ================ Combination ================= //

+__declspec(noinline) void __bg_fp32_versor_normalize(const float square_modulus, __BgFP32DarkTwinVersor* twin);
+
 static inline void bg_fp32_versor_combine(const BgFP32Versor* second, const BgFP32Versor* first, BgFP32Versor* result)
 {
    const float s0 = (second->s0 * first->s0 - second->x1 * first->x1) - (second->x2 * first->x2 + second->x3 * first->x3);