Github User Fetcher 1.0.0
C Application with Server and GUI
Loading...
Searching...
No Matches
graphene-simd4x4f.h
Go to the documentation of this file.
1/* graphene-simd4x4f.h: 4x4 float vector operations
2 *
3 * SPDX-License-Identifier: MIT
4 *
5 * Copyright 2014 Emmanuele Bassi
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26#pragma once
27
28#include "graphene-simd4f.h"
29
30#include <math.h>
31#include <float.h>
32
34
35/**
36 * graphene_simd4x4f_t:
37 *
38 * A SIMD-based matrix type that uses four #graphene_simd4f_t vectors.
39 *
40 * The matrix is treated as row-major, i.e. the x, y, z, and w vectors
41 * are rows, and elements of each vector are a column:
42 *
43 * |[<!-- language="C" -->
44 * graphene_simd4x4f_t = {
45 * x.x, x.y, x.z, x.w,
46 * y.x, y.y, y.z, y.w,
47 * z.x, z.y, z.z, z.w,
48 * w.x, w.y, w.z, w.w
49 * }
50 * ]|
51 *
52 * The contents of the #graphene_simd4x4f_t type are private and
53 * cannot be accessed directly; use the provided API instead.
54 *
55 * Since: 1.0
56 */
57
58/**
59 * graphene_simd4x4f_init:
60 * @x: a #graphene_simd4f_t for the first row
61 * @y: a #graphene_simd4f_t for the second row
62 * @z: a #graphene_simd4f_t for the third row
63 * @w: a #graphene_simd4f_t for the fourth row
64 *
65 * Creates a new #graphene_simd4x4f_t using the given row vectors
66 * to initialize it.
67 *
68 * Returns: the newly created #graphene_simd4x4f_t
69 *
70 * Since: 1.0
71 */
77{
79
80 s.x = x;
81 s.y = y;
82 s.z = z;
83 s.w = w;
84
85 return s;
86}
87
88/**
89 * graphene_simd4x4f_init_identity:
90 * @m: a #graphene_simd4x4f_t
91 *
92 * Initializes @m to be the identity matrix.
93 *
94 * Since: 1.0
95 */
96static inline void
98{
99 *m = graphene_simd4x4f_init (graphene_simd4f_init (1.0f, 0.0f, 0.0f, 0.0f),
100 graphene_simd4f_init (0.0f, 1.0f, 0.0f, 0.0f),
101 graphene_simd4f_init (0.0f, 0.0f, 1.0f, 0.0f),
102 graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f));
103}
104
105/**
106 * graphene_simd4x4f_init_from_float:
107 * @m: a #graphene_simd4x4f_t
108 * @f: (array fixed-size=16): an array of 16 floating point values
109 *
110 * Initializes a #graphene_simd4x4f_t with the given array
111 * of floating point values.
112 *
113 * Since: 1.0
114 */
115static inline void
117 const float *f)
118{
119 m->x = graphene_simd4f_init_4f (f + 0);
120 m->y = graphene_simd4f_init_4f (f + 4);
121 m->z = graphene_simd4f_init_4f (f + 8);
122 m->w = graphene_simd4f_init_4f (f + 12);
123}
124
125/**
126 * graphene_simd4x4f_to_float:
127 * @m: a #graphene_sidm4x4f_t
128 * @v: (out caller-allocates) (array fixed-size=16): a floating
129 * point values vector capable of holding at least 16 values
130 *
131 * Copies the content of @m in a float array.
132 *
133 * Since: 1.0
134 */
135static inline void
137 float *v)
138{
139 graphene_simd4f_dup_4f (m->x, v + 0);
140 graphene_simd4f_dup_4f (m->y, v + 4);
141 graphene_simd4f_dup_4f (m->z, v + 8);
142 graphene_simd4f_dup_4f (m->w, v + 12);
143}
144
147
148#if defined(GRAPHENE_USE_SSE)
149
150#ifdef __GNUC__
151#define graphene_simd4x4f_transpose_in_place(s) \
152 (__extension__ ({ \
153 _MM_TRANSPOSE4_PS ((s)->x, (s)->y, (s)->z, (s)->w); \
154 }))
155#elif defined (_MSC_VER)
156#define graphene_simd4x4f_transpose_in_place(s) \
157 _MM_TRANSPOSE4_PS ((s)->x, (s)->y, (s)->z, (s)->w)
158#endif
159
160#elif defined(GRAPHENE_USE_GCC)
161
162#define graphene_simd4x4f_transpose_in_place(s) \
163 (__extension__ ({ \
164 const graphene_simd4f_t sx = (s)->x; \
165 const graphene_simd4f_t sy = (s)->y; \
166 const graphene_simd4f_t sz = (s)->z; \
167 const graphene_simd4f_t sw = (s)->w; \
168 (s)->x = graphene_simd4f_init (sx[0], sy[0], sz[0], sw[0]); \
169 (s)->y = graphene_simd4f_init (sx[1], sy[1], sz[1], sw[1]); \
170 (s)->z = graphene_simd4f_init (sx[2], sy[2], sz[2], sw[2]); \
171 (s)->w = graphene_simd4f_init (sx[3], sy[3], sz[3], sw[3]); \
172 }))
173
174#elif defined(GRAPHENE_USE_ARM_NEON)
175
176# ifdef __GNUC__
177
178#define graphene_simd4x4f_transpose_in_place(s) \
179 (__extension__ ({ \
180 const graphene_simd4f_union_t sx = { (s)->x }; \
181 const graphene_simd4f_union_t sy = { (s)->y }; \
182 const graphene_simd4f_union_t sz = { (s)->z }; \
183 const graphene_simd4f_union_t sw = { (s)->w }; \
184 (s)->x = graphene_simd4f_init (sx.f[0], sy.f[0], sz.f[0], sw.f[0]); \
185 (s)->y = graphene_simd4f_init (sx.f[1], sy.f[1], sz.f[1], sw.f[1]); \
186 (s)->z = graphene_simd4f_init (sx.f[2], sy.f[2], sz.f[2], sw.f[2]); \
187 (s)->w = graphene_simd4f_init (sx.f[3], sy.f[3], sz.f[3], sw.f[3]); \
188 }))
189
190# elif defined (_MSC_VER)
191
192#define graphene_simd4x4f_transpose_in_place(s) _simd4x4f_transpose_in_place(s)
193static inline void
194_simd4x4f_transpose_in_place (graphene_simd4x4f_t *s)
195{
196 const graphene_simd4f_union_t sx = { (s)->x };
197 const graphene_simd4f_union_t sy = { (s)->y };
198 const graphene_simd4f_union_t sz = { (s)->z };
199 const graphene_simd4f_union_t sw = { (s)->w };
200 (s)->x = graphene_simd4f_init (sx.f[0], sy.f[0], sz.f[0], sw.f[0]);
201 (s)->y = graphene_simd4f_init (sx.f[1], sy.f[1], sz.f[1], sw.f[1]);
202 (s)->z = graphene_simd4f_init (sx.f[2], sy.f[2], sz.f[2], sw.f[2]);
203 (s)->w = graphene_simd4f_init (sx.f[3], sy.f[3], sz.f[3], sw.f[3]);
204}
205
206# endif
207
208#elif defined(GRAPHENE_USE_SCALAR)
209
210#define graphene_simd4x4f_transpose_in_place(s) \
211 (graphene_simd4x4f_transpose_in_place ((graphene_simd4x4f_t *) (s)))
212
213#else
214# error "No implementation for graphene_simd4x4f_t defined."
215#endif
216
217/**
218 * graphene_simd4x4f_sum:
219 * @a: a #graphene_simd4f_t
220 * @res: (out): return location for the sum vector
221 *
222 * Adds all the row vectors of @a.
223 *
224 * Since: 1.0
225 */
226static inline void
229{
231 s = graphene_simd4f_add (s, a->z);
232 s = graphene_simd4f_add (s, a->w);
233 *res = s;
234}
235
236/**
237 * graphene_simd4x4f_vec4_mul:
238 * @a: a #graphene_simd4x4f_t
239 * @b: a #graphene_simd4f_t
240 * @res: (out): return location for a #graphene_simd4f_t
241 *
242 * Left multiplies the given #graphene_simd4x4f_t with the given
243 * #graphene_simd4f_t row vector using a dot product:
244 *
245 * |[<!-- language="plain" -->
246 * res = b × A
247 *
248 * = ⎡x⎤ ⎛ x.x x.y x.z x.w ⎞
249 * ⎜y⎟ ⎜ y.x y.y y.z y.w ⎟
250 * ⎜z⎟ ⎜ z.x z.y z.z z.w ⎟
251 * ⎣w⎦ ⎝ w.x w.y w.z w.w ⎠
252 *
253 * = [ x.x × x x.y × x x.z × x x.w × x ]
254 * + + + +
255 * [ y.x × y y.y × y y.z × y y.w × y ]
256 * + + + +
257 * [ z.x × z z.y × z z.z × z z.w × z ]
258 * + + + +
259 * [ w.x × w w.y × w w.z × w w.w × w ]
260 *
261 * = ⎡ x.x × x + y.x × y + z.x × z + w.x × w ⎤
262 * ⎜ x.y × x + y.y × y + z.y × z + w.y × w ⎟
263 * ⎜ x.z × x + y.z × y + z.z × z + w.z × w ⎟
264 * ⎣ x.w × x + y.w × y + z.w × z + w.w × w ⎦
265 * ]|
266 *
267 * Since: 1.0
268 */
269static inline void
285
286/**
287 * graphene_simd4x4f_vec3_mul:
288 * @m: a #graphene_simd4x4f_t
289 * @v: a #graphene_simd4f_t
290 * @res: (out): return location for a #graphene_simd4f_t
291 *
292 * Left multiplies the given #graphene_simd4x4f_t with the given
293 * #graphene_simd4f_t, using only the first three row vectors
294 * of the matrix, and the first three components of the vector;
295 * the W components of the matrix and vector are ignored:
296 *
297 * |[<!-- language="plain" -->
298 * res = b × A
299 *
300 * = ⎡x⎤ ⎛ x.x x.y x.z ⎞
301 * ⎜y⎟ ⎜ y.x y.y y.z ⎟
302 * ⎣z⎦ ⎝ z.x z.y z.z ⎠
303 *
304 * = [ x.x × x x.y × x x.z × x ]
305 * + + +
306 * [ y.x × y y.y × y y.z × y ]
307 * + + +
308 * [ z.x × z z.y × z z.z × z ]
309 *
310 * = ⎡ x.x × x + y.x × y + z.x × z ⎤
311 * ⎜ x.y × x + y.y × y + z.y × z ⎟
312 * ⎜ x.z × x + y.z × y + z.z × z ⎟
313 * ⎣ 0 ⎦
314 * ]|
315 *
316 * See also: graphene_simd4x4f_vec4_mul(), graphene_simd4x4f_point3_mul()
317 *
318 * Since: 1.0
319 */
320static inline void
335
336/**
337 * graphene_simd4x4f_point3_mul:
338 * @m: a #graphene_simd4x4f_t
339 * @p: a #graphene_simd4f_t
340 * @res: (out): return location for a #graphene_simd4f_t
341 *
342 * Multiplies the given #graphene_simd4x4f_t with the given
343 * #graphene_simd4f_t.
344 *
345 * Unlike graphene_simd4x4f_vec3_mul(), this function will
346 * use the W components of the matrix:
347 *
348 * |[<!-- language="plain" -->
349 * res = b × A
350 *
351 * = ⎡x⎤ ⎛ x.x x.y x.z x.w ⎞
352 * ⎜y⎟ ⎜ y.x y.y y.z y.w ⎟
353 * ⎜z⎟ ⎜ z.x z.y z.z z.w ⎟
354 * ⎣w⎦ ⎝ w.x w.y w.z w.w ⎠
355 *
356 * = [ x.x × x x.y × x x.z × x x.w × x ]
357 * + + + +
358 * [ y.x × y y.y × y y.z × y y.w × y ]
359 * + + + +
360 * [ z.x × z z.y × z z.z × z z.w × z ]
361 * + + + +
362 * [ w.x w.y w.z w.w ]
363 *
364 * = ⎡ x.x × x + y.x × y + z.x × z + w.x ⎤
365 * ⎜ x.y × x + y.y × y + z.y × z + w.y ⎟
366 * ⎜ x.z × x + y.z × y + z.z × z + w.z ⎟
367 * ⎣ x.w × x + y.w × y + z.w × z + w.w ⎦
368 * ]|
369 *
370 * Since: 1.0
371 */
372static inline void
387
388/**
389 * graphene_simd4x4f_transpose:
390 * @s: a #graphene_simd4x4f_t
391 * @res: (out): return location for the transposed matrix
392 *
393 * Transposes the given #graphene_simd4x4f_t.
394 *
395 * Since: 1.0
396 */
397static inline void
404
405/**
406 * graphene_simd4x4f_inv_ortho_vec3_mul:
407 * @a: a #graphene_simd4x4f_t
408 * @b: a #graphene_simd4f_t
409 * @res: (out): return location for the transformed vector
410 *
411 * Performs the inverse orthographic transformation of the first
412 * three components in the given vector, using the first three
413 * row vectors of the given SIMD matrix.
414 *
415 * Since: 1.0
416 */
417static inline void
419 const graphene_simd4f_t *b,
421{
422 graphene_simd4x4f_t transpose = *a;
423 graphene_simd4f_t translation = *b;
424
425 transpose.w = graphene_simd4f_init (0.f, 0.f, 0.f, 0.f);
427
428 graphene_simd4x4f_vec3_mul (&transpose, &translation, res);
429}
430
431/**
432 * graphene_simd4x4f_inv_ortho_point3_mul:
433 * @a: a #graphene_simd4x4f_t
434 * @b: a #graphene_simd4x4f_t
435 * @res: (out): return location for the result vector
436 *
437 * Performs the inverse orthographic transformation of the first
438 * three components in the given vector, using the given SIMD
439 * matrix.
440 *
441 * Unlike graphene_simd4x4f_inv_ortho_vec3_mul(), this function
442 * will also use the fourth row vector of the SIMD matrix.
443 *
444 * Since: 1.0
445 */
446static inline void
448 const graphene_simd4f_t *b,
450{
451 graphene_simd4f_t translation = graphene_simd4f_sub (*b, a->w);
452 graphene_simd4x4f_t transpose = *a;
453
454 transpose.w = graphene_simd4f_init (0.f, 0.f, 0.f, 0.f);
456
457 graphene_simd4x4f_point3_mul (&transpose, &translation, res);
458}
459
460/**
461 * graphene_simd4x4f_matrix_mul:
462 * @a: a #graphene_simd4x4f_t
463 * @b: a #graphene_simd4x4f_t
464 * @res: (out): return location for the result
465 *
466 * Multiplies the two matrices, following the convention:
467 *
468 * |[<!-- language="plain" -->
469 * res = A × B
470 *
471 * = ⎡ A.x × B ⎤
472 * ⎜ A.y × B ⎟
473 * ⎜ A.z × B ⎟
474 * ⎣ A.w × B ⎦
475 *
476 * = ⎡ res.x ⎤
477 * ⎜ res.y ⎟
478 * ⎜ res.z ⎟
479 * ⎣ res.w ⎦
480 * ]|
481 *
482 * See also: graphene_simd4x4f_vec4_mul()
483 *
484 * Since: 1.0
485 */
486static inline void
488 const graphene_simd4x4f_t *b,
490{
491#if 0
492 /* this is the classic naive A*B implementation of the row * column
493 * matrix product. using a SIMD scalar implementation, it's fairly
494 * slow at 329ns per multiplication; the SSE implementation makes it
495 * about 10x faster, at 32ns; the GCC vector implementation is only
496 * 5x faster, at 66ns. the biggest culprits are the transpose operation
497 * and the multiple, one lane reads to compute the scalar sum.
498 */
500
502
503 res->x =
508
509 res->y =
514
515 res->z =
520
521 res->w =
526#else
527 /* this is an optimized version of the matrix multiplication, using
528 * four dot products for each row vector. this yields drastically
529 * better numbers while retaining the same correct results as above:
530 * the scalar implementation now clocks at 91ns; the GCC vector
531 * implementation is 19ns; and the SSE implementation is 16ns.
532 *
533 * the order is correct if we want to multiply A with B; remember
534 * that matrix multiplication is non-commutative.
535 */
536 graphene_simd4f_t x, y, z, w;
537
538 graphene_simd4x4f_vec4_mul (b, &a->x, &x);
539 graphene_simd4x4f_vec4_mul (b, &a->y, &y);
540 graphene_simd4x4f_vec4_mul (b, &a->z, &z);
541 graphene_simd4x4f_vec4_mul (b, &a->w, &w);
542
543 *res = graphene_simd4x4f_init (x, y, z, w);
544#endif
545}
546
547/**
548 * graphene_simd4x4f_init_perspective:
549 * @m: a #graphene_simd4x4f_t
550 * @fovy_rad: the angle of the field of vision, in radians
551 * @aspect: the aspect value
552 * @z_near: the depth of the near clipping plane
553 * @z_far: the depth of the far clipping plane
554 *
555 * Initializes a #graphene_simd4x4f_t with a perspective projection.
556 *
557 * Since: 1.0
558 */
559static inline void
561 float fovy_rad,
562 float aspect,
563 float z_near,
564 float z_far)
565{
566 float delta_z = z_far - z_near;
567 float cotangent = tanf (GRAPHENE_PI_2 - fovy_rad * 0.5f);
568
569 float a = cotangent / aspect;
570 float b = cotangent;
571 float c = -(z_far + z_near) / delta_z;
572 float d = -2 * z_near * z_far / delta_z;
573
574 m->x = graphene_simd4f_init ( a, 0.0f, 0.0f, 0.0f);
575 m->y = graphene_simd4f_init (0.0f, b, 0.0f, 0.0f);
576 m->z = graphene_simd4f_init (0.0f, 0.0f, c, -1.0f);
577 m->w = graphene_simd4f_init (0.0f, 0.0f, d, 0.0f);
578}
579
580/**
581 * graphene_simd4x4f_init_ortho:
582 * @m: a #graphene_simd4x4f_t
583 * @left: edge of the left clipping plane
584 * @right: edge of the right clipping plane
585 * @bottom: edge of the bottom clipping plane
586 * @top: edge of the top clipping plane
587 * @z_near: depth of the near clipping plane
588 * @z_far: depth of the far clipping plane
589 *
590 * Initializes the given SIMD matrix with an orthographic projection.
591 *
592 * Since: 1.0
593 */
594static inline void
596 float left,
597 float right,
598 float bottom,
599 float top,
600 float z_near,
601 float z_far)
602{
603 float delta_x = right - left;
604 float delta_y = top - bottom;
605 float delta_z = z_far - z_near;
606
607 float a = 2.0f / delta_x;
608 float b = -(right + left) / delta_x;
609 float c = 2.0f / delta_y;
610 float d = -(top + bottom) / delta_y;
611 float e = -2.0f / delta_z;
612 float f = -(z_far + z_near) / delta_z;
613
614 m->x = graphene_simd4f_init ( a, 0.0f, 0.0f, 0.0f);
615 m->y = graphene_simd4f_init (0.0f, c, 0.0f, 0.0f);
616 m->z = graphene_simd4f_init (0.0f, 0.0f, e, 0.0f);
617 m->w = graphene_simd4f_init ( b, d, f, 1.0f);
618}
619
620/**
621 * graphene_simd4x4f_init_look_at:
622 * @m: a #graphene_simd4x4f_t
623 * @eye: vector for the camera coordinates
624 * @center: vector for the object coordinates
625 * @up: vector for the upwards direction
626 *
627 * Initializes a SIMD matrix with the projection necessary for
628 * the camera at the @eye coordinates to look at the object at
629 * the @center coordinates. The top of the camera is aligned to
630 * the @up vector.
631 *
632 * Since: 1.0
633 */
634static inline void
637 graphene_simd4f_t center,
639{
640 const graphene_simd4f_t direction = graphene_simd4f_sub (center, eye);
641 graphene_simd4f_t cross;
642 graphene_simd4f_t z_axis;
643 graphene_simd4f_t x_axis;
644 graphene_simd4f_t y_axis;
645 float eye_v[4];
646
647 if (graphene_simd4f_get_x (graphene_simd4f_dot3 (direction, direction)) < FLT_EPSILON)
648 /* eye and center are in the same position */
649 z_axis = graphene_simd4f_init (0, 0, 1, 0);
650 else
651 z_axis = graphene_simd4f_normalize3 (direction);
652
653 cross = graphene_simd4f_cross3 (z_axis, up);
654 if (graphene_simd4f_get_x (graphene_simd4f_dot3 (cross, cross)) < FLT_EPSILON)
655 {
656 graphene_simd4f_t tweak_z;
657
658 /* up and z_axis are parallel */
659 if (fabs (graphene_simd4f_get_z (up) - 1.0) < FLT_EPSILON)
660 tweak_z = graphene_simd4f_init (0.0001f, 0, 0, 0);
661 else
662 tweak_z = graphene_simd4f_init (0, 0, 0.0001f, 0);
663
664 z_axis = graphene_simd4f_add (z_axis, tweak_z);
665 z_axis = graphene_simd4f_normalize3 (z_axis);
666 cross = graphene_simd4f_cross3 (z_axis, up);
667 }
668
669 x_axis = graphene_simd4f_normalize3 (cross);
670 y_axis = graphene_simd4f_cross3 (x_axis, z_axis);
671
672 graphene_simd4f_dup_4f (eye, eye_v);
673
674 m->x = x_axis;
675 m->y = y_axis;
676 m->z = graphene_simd4f_neg (z_axis);
677 m->w = graphene_simd4f_init (-eye_v[0], -eye_v[1], -eye_v[2], 1.f);
678}
679
680/**
681 * graphene_simd4x4f_init_frustum:
682 * @m: a #graphene_simd4x4f_t
683 * @left: distance of the left clipping plane
684 * @right: distance of the right clipping plane
685 * @bottom: distance of the bottom clipping plane
686 * @top: distance of the top clipping plane
687 * @z_near: distance of the near clipping plane
688 * @z_far: distance of the far clipping plane
689 *
690 * Initializes a SIMD matrix with a frustum described by the distances
691 * of six clipping planes.
692 *
693 * Since: 1.2
694 */
695static inline void
697 float left,
698 float right,
699 float bottom,
700 float top,
701 float z_near,
702 float z_far)
703{
704 float x = 2.f * z_near / (right - left);
705 float y = 2.f * z_near / (top - bottom);
706
707 float a = (right + left) / (right - left);
708 float b = (top + bottom) / (top - bottom);
709 float c = -1.f * (z_far + z_near) / (z_far - z_near);
710 float d = -2.f * z_far * z_near / (z_far - z_near);
711
712 m->x = graphene_simd4f_init ( x, 0.f, 0.f, 0.f);
713 m->y = graphene_simd4f_init (0.f, y, 0.f, 0.f);
714 m->z = graphene_simd4f_init ( a, b, c, -1.f);
715 m->w = graphene_simd4f_init (0.f, 0.f, d, 0.f);
716}
717
718/**
719 * graphene_simd4x4f_perspective:
720 * @m: a #graphene_simd4x4f_t
721 * @depth: depth of the perspective
722 *
723 * Adds a perspective transformation for the given @depth.
724 *
725 * Since: 1.0
726 */
727static inline void
729 float depth)
730{
731#if 1
732 const float m_xw = graphene_simd4f_get_w (m->x);
733 const float m_yw = graphene_simd4f_get_w (m->y);
734 const float m_zw = graphene_simd4f_get_w (m->z);
735 const float m_ww = graphene_simd4f_get_w (m->w);
736
737 const float p0 = graphene_simd4f_get_z (m->x) + -1.0f / depth * m_xw;
738 const float p1 = graphene_simd4f_get_z (m->y) + -1.0f / depth * m_yw;
739 const float p2 = graphene_simd4f_get_z (m->z) + -1.0f / depth * m_zw;
740 const float p3 = graphene_simd4f_get_z (m->w) + -1.0f / depth * m_ww;
741
742 const graphene_simd4f_t p_x = graphene_simd4f_merge_w (m->x, m_xw + p0);
743 const graphene_simd4f_t p_y = graphene_simd4f_merge_w (m->y, m_yw + p1);
744 const graphene_simd4f_t p_z = graphene_simd4f_merge_w (m->z, m_zw + p2);
745 const graphene_simd4f_t p_w = graphene_simd4f_merge_w (m->w, m_ww + p3);
746#else
747 /* this is equivalent to the operations above, but trying to inline
748 * them into SIMD registers as much as possible by transposing the
749 * original matrix and operating on the resulting column vectors. it
750 * should warrant a micro benchmark, because while the above code is
751 * dominated by single channel reads, the code below has a transpose
752 * operation.
753 */
755 const graphene_simd4f_t f, p;
756 const graphene_simd4f_t p_x, p_y, p_z, p_w;
757
759
766#endif
767
768 *m = graphene_simd4x4f_init (p_x, p_y, p_z, p_w);
769}
770
771/**
772 * graphene_simd4x4f_translation:
773 * @m: a #graphene_simd4x4f_t
774 * @x: coordinate of the X translation
775 * @y: coordinate of the Y translation
776 * @z: coordinate of the Z translation
777 *
778 * Initializes @m to contain a translation to the given coordinates.
779 *
780 * Since: 1.0
781 */
782static inline void
784 float x,
785 float y,
786 float z)
787{
788 *m = graphene_simd4x4f_init (graphene_simd4f_init (1.0f, 0.0f, 0.0f, 0.0f),
789 graphene_simd4f_init (0.0f, 1.0f, 0.0f, 0.0f),
790 graphene_simd4f_init (0.0f, 0.0f, 1.0f, 0.0f),
791 graphene_simd4f_init ( x, y, z, 1.0f));
792}
793
794/**
795 * graphene_simd4x4f_scale:
796 * @m: a #graphene_simd4x4f_t
797 * @x: scaling factor on the X axis
798 * @y: scaling factor on the Y axis
799 * @z: scaling factor on the Z axis
800 *
801 * Initializes @m to contain a scaling transformation with the
802 * given factors.
803 *
804 * Since: 1.0
805 */
806static inline void
808 float x,
809 float y,
810 float z)
811{
812 *m = graphene_simd4x4f_init (graphene_simd4f_init ( x, 0.0f, 0.0f, 0.0f),
813 graphene_simd4f_init (0.0f, y, 0.0f, 0.0f),
814 graphene_simd4f_init (0.0f, 0.0f, z, 0.0f),
815 graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f));
816
817}
818
819/**
820 * graphene_simd4x4f_rotation:
821 * @m: a #graphene_simd4x4f_t
822 * @rad: the rotation, in radians
823 * @axis: the vector of the axis of rotation
824 *
825 * Initializes @m to contain a rotation of the given angle
826 * along the given axis.
827 *
828 * Since: 1.0
829 */
830static inline void
832 float rad,
834{
835 float sine, cosine;
836 float x, y, z;
837 float ab, bc, ca;
838 float tx, ty, tz;
839 graphene_simd4f_t i, j, k;
840
841 rad = -rad;
842 axis = graphene_simd4f_normalize3 (axis);
843
844 /* We cannot use graphene_sincos() because it's a private function, whereas
845 * graphene-simd4x4f.h is a public header
846 */
847 sine = sinf (rad);
848 cosine = cosf (rad);
849
850 x = graphene_simd4f_get_x (axis);
851 y = graphene_simd4f_get_y (axis);
852 z = graphene_simd4f_get_z (axis);
853
854 ab = x * y * (1.0f - cosine);
855 bc = y * z * (1.0f - cosine);
856 ca = z * x * (1.0f - cosine);
857
858 tx = x * x;
859 ty = y * y;
860 tz = z * z;
861
862 i = graphene_simd4f_init (tx + cosine * (1.0f - tx), ab - z * sine, ca + y * sine, 0.f);
863 j = graphene_simd4f_init (ab + z * sine, ty + cosine * (1.0f - ty), bc - x * sine, 0.f);
864 k = graphene_simd4f_init (ca - y * sine, bc + x * sine, tz + cosine * (1.0f - tz), 0.f);
865
866 *m = graphene_simd4x4f_init (i, j, k, graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f));
867}
868
869/**
870 * graphene_simd4x4f_add:
871 * @a: a #graphene_simd4x4f_t
872 * @b: a #graphene_simd4x4f_t
873 * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
874 *
875 * Adds each row vector of @a and @b and places the results in @res.
876 *
877 * Since: 1.0
878 */
879static inline void
881 const graphene_simd4x4f_t *b,
883{
884 res->x = graphene_simd4f_add (a->x, b->x);
885 res->y = graphene_simd4f_add (a->y, b->y);
886 res->z = graphene_simd4f_add (a->z, b->z);
887 res->w = graphene_simd4f_add (a->w, b->w);
888}
889
890/**
891 * graphene_simd4x4f_sub:
892 * @a: a #graphene_simd4x4f_t
893 * @b: a #graphene_simd4x4f_t
894 * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
895 *
896 * Subtracts each row vector of @a and @b and places the results in @res.
897 *
898 * Since: 1.0
899 */
900static inline void
902 const graphene_simd4x4f_t *b,
904{
905 res->x = graphene_simd4f_sub (a->x, b->x);
906 res->y = graphene_simd4f_sub (a->y, b->y);
907 res->z = graphene_simd4f_sub (a->z, b->z);
908 res->w = graphene_simd4f_sub (a->w, b->w);
909}
910
911/**
912 * graphene_simd4x4f_mul:
913 * @a: a #graphene_simd4x4f_t
914 * @b: a #graphene_simd4x4f_t
915 * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
916 *
917 * Multiplies each row vector of @a and @b and places the results in @res.
918 *
919 * You most likely want graphene_simd4x4f_matrix_mul() instead.
920 *
921 * Since: 1.0
922 */
923static inline void
925 const graphene_simd4x4f_t *b,
927{
928 res->x = graphene_simd4f_mul (a->x, b->x);
929 res->y = graphene_simd4f_mul (a->y, b->y);
930 res->z = graphene_simd4f_mul (a->z, b->z);
931 res->w = graphene_simd4f_mul (a->w, b->w);
932}
933
934/**
935 * graphene_simd4x4f_div:
936 * @a: a #graphene_simd4x4f_t
937 * @b: a #graphene_simd4x4f_t
938 * @res: (out caller-allocates): return location for a #graphene_simd4x4f_t
939 *
940 * Divides each row vector of @a and @b and places the results in @res.
941 *
942 * Since: 1.0
943 */
944static inline void
946 const graphene_simd4x4f_t *b,
948{
949 res->x = graphene_simd4f_div (a->x, b->x);
950 res->y = graphene_simd4f_div (a->y, b->y);
951 res->z = graphene_simd4f_div (a->z, b->z);
952 res->w = graphene_simd4f_div (a->w, b->w);
953}
954
955/**
956 * graphene_simd4x4f_inverse:
957 * @m: a #graphene_simd4x4f_t
958 * @res: (out): return location for the inverse matrix
959 *
960 * Inverts the given #graphene_simd4x4f_t.
961 *
962 * Returns: `true` if the matrix was invertible
963 *
964 * Since: 1.0
965 */
966static inline bool
969{
970 /* split rows */
971 const graphene_simd4f_t r0 = m->x;
972 const graphene_simd4f_t r1 = m->y;
973 const graphene_simd4f_t r2 = m->z;
974 const graphene_simd4f_t r3 = m->w;
975
976 /* cofactors */
980
984
988
992
993 const graphene_simd4f_t r0_wxyz_x_r1 = graphene_simd4f_mul (r0_wxyz, r1);
994 const graphene_simd4f_t r0_wxyz_x_r1_yzwx = graphene_simd4f_mul (r0_wxyz, r1_yzwx);
995 const graphene_simd4f_t r0_wxyz_x_r1_zwxy = graphene_simd4f_mul (r0_wxyz, r1_zwxy);
996
997 const graphene_simd4f_t r2_wxyz_x_r3 = graphene_simd4f_mul (r2_wxyz, r3);
998 const graphene_simd4f_t r2_wxyz_x_r3_yzwx = graphene_simd4f_mul (r2_wxyz, r3_yzwx);
999 const graphene_simd4f_t r2_wxyz_x_r3_zwxy = graphene_simd4f_mul (r2_wxyz, r3_zwxy);
1000
1002 graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3));
1004 r2_wxyz_x_r3_yzwx);
1005 const graphene_simd4f_t ar3 = graphene_simd4f_sub (r2_wxyz_x_r3_zwxy,
1006 graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3));
1007
1009 graphene_simd4f_shuffle_zwxy (r0_wxyz_x_r1));
1011 r0_wxyz_x_r1_yzwx);
1012 const graphene_simd4f_t br3 = graphene_simd4f_sub (r0_wxyz_x_r1_zwxy,
1013 graphene_simd4f_shuffle_wxyz (r0_wxyz_x_r1));
1014
1015 const graphene_simd4f_t r0_sum =
1016 graphene_simd4f_madd (r0_yzwx, ar3,
1017 graphene_simd4f_madd (r0_zwxy, ar2,
1018 graphene_simd4f_mul (r0_wxyz, ar1)));
1019 const graphene_simd4f_t r1_sum =
1020 graphene_simd4f_madd (r1_wxyz, ar1,
1021 graphene_simd4f_madd (r1_zwxy, ar2,
1022 graphene_simd4f_mul (r1_yzwx, ar3)));
1023 const graphene_simd4f_t r2_sum =
1024 graphene_simd4f_madd (r2_yzwx, br3,
1025 graphene_simd4f_madd (r2_zwxy, br2,
1026 graphene_simd4f_mul (r2_wxyz, br1)));
1027 const graphene_simd4f_t r3_sum =
1028 graphene_simd4f_madd (r3_yzwx, br3,
1029 graphene_simd4f_madd (r3_zwxy, br2,
1030 graphene_simd4f_mul (r3_wxyz, br1)));
1031
1032 /* determinant and its inverse */
1033 const graphene_simd4f_t d0 = graphene_simd4f_mul (r1_sum, r0);
1036 if (fabsf (graphene_simd4f_get_x (det)) >= FLT_EPSILON)
1037 {
1039
1044
1045 graphene_simd4x4f_t mt = graphene_simd4x4f_init (o0, o1, o2, o3);
1046
1047 /* transpose the resulting matrix */
1048 graphene_simd4x4f_transpose (&mt, res);
1049
1050 return true;
1051 }
1052
1053 return false;
1054}
1055
1056/**
1057 * graphene_simd4x4f_determinant:
1058 * @m: a #graphene_simd4x4f_t
1059 * @det_r: (out): return location for the matrix determinant
1060 * @invdet_r: (out): return location for the inverse of the matrix
1061 * determinant
1062 *
1063 * Computes the determinant (and its inverse) of the given matrix
1064 *
1065 * Since: 1.0
1066 */
1067static inline void
1069 graphene_simd4f_t *det_r,
1070 graphene_simd4f_t *invdet_r)
1071{
1072 /* split rows */
1073 const graphene_simd4f_t r0 = m->x;
1074 const graphene_simd4f_t r1 = m->y;
1075 const graphene_simd4f_t r2 = m->z;
1076 const graphene_simd4f_t r3 = m->w;
1077
1078 /* cofactors */
1082
1084
1087
1088 const graphene_simd4f_t r2_wxyz_x_r3 = graphene_simd4f_mul (r2_wxyz, r3);
1089 const graphene_simd4f_t r2_wxyz_x_r3_yzwx = graphene_simd4f_mul (r2_wxyz, r3_yzwx);
1090 const graphene_simd4f_t r2_wxyz_x_r3_zwxy = graphene_simd4f_mul (r2_wxyz, r3_zwxy);
1091
1093 graphene_simd4f_shuffle_zwxy (r2_wxyz_x_r3));
1095 r2_wxyz_x_r3_yzwx);
1096 const graphene_simd4f_t ar3 = graphene_simd4f_sub (r2_wxyz_x_r3_zwxy,
1097 graphene_simd4f_shuffle_wxyz (r2_wxyz_x_r3));
1098
1099 const graphene_simd4f_t r1_sum =
1100 graphene_simd4f_madd (r1_wxyz, ar1,
1101 graphene_simd4f_madd (r1_zwxy, ar2,
1102 graphene_simd4f_mul (r1_yzwx, ar3)));
1103
1104 /* determinant and its inverse */
1105 const graphene_simd4f_t d0 = graphene_simd4f_mul (r1_sum, r0);
1107
1109
1111
1112 if (det_r != NULL)
1113 *det_r = det;
1114
1115 if (invdet_r != NULL)
1116 *invdet_r = invdet;
1117}
1118
1119/**
1120 * graphene_simd4x4f_is_identity:
1121 * @m: a #graphene_simd4x4f_t
1122 *
1123 * Checks whether the given matrix is the identity matrix.
1124 *
1125 * Returns: `true` if the matrix is the identity matrix
1126 *
1127 * Since: 1.0
1128 */
1129static inline bool
1131{
1132 const graphene_simd4f_t r0 = graphene_simd4f_init (1.0f, 0.0f, 0.0f, 0.0f);
1133 const graphene_simd4f_t r1 = graphene_simd4f_init (0.0f, 1.0f, 0.0f, 0.0f);
1134 const graphene_simd4f_t r2 = graphene_simd4f_init (0.0f, 0.0f, 1.0f, 0.0f);
1135 const graphene_simd4f_t r3 = graphene_simd4f_init (0.0f, 0.0f, 0.0f, 1.0f);
1136
1137 return graphene_simd4f_cmp_eq (m->x, r0) &&
1138 graphene_simd4f_cmp_eq (m->y, r1) &&
1139 graphene_simd4f_cmp_eq (m->z, r2) &&
1140 graphene_simd4f_cmp_eq (m->w, r3);
1141}
1142
1143/**
1144 * graphene_simd4x4f_is_2d:
1145 * @m: a #graphene_simd4x4f_t
1146 *
1147 * Checks whether the given matrix is compatible with an affine
1148 * transformation matrix.
1149 *
1150 * Returns: `true` if the matrix is compatible with an affine
1151 * transformation matrix
1152 *
1153 * Since: 1.0
1154 */
1155static inline bool
1157{
1158 float f[4];
1159
1160 if (!(fabsf (graphene_simd4f_get_z (m->x)) < FLT_EPSILON && fabsf (graphene_simd4f_get_w (m->x)) < FLT_EPSILON))
1161 return false;
1162
1163 if (!(fabsf (graphene_simd4f_get_z (m->y)) < FLT_EPSILON && fabsf (graphene_simd4f_get_w (m->y)) < FLT_EPSILON))
1164 return false;
1165
1166 graphene_simd4f_dup_4f (m->z, f);
1167 if (!(fabsf (f[0]) < FLT_EPSILON &&
1168 fabsf (f[1]) < FLT_EPSILON &&
1169 1.f - fabsf (f[2]) < FLT_EPSILON &&
1170 fabsf (f[3]) < FLT_EPSILON))
1171 return false;
1172
1173 if (!(fabsf (graphene_simd4f_get_z (m->w)) < FLT_EPSILON && 1.f - fabsf (graphene_simd4f_get_w (m->w)) < FLT_EPSILON))
1174 return false;
1175
1176 return true;
1177}
1178
guint depth
#define NULL
Definition gmacros.h:924
#define GRAPHENE_END_DECLS
#define GRAPHENE_VECTORCALL
#define GRAPHENE_PI_2
#define GRAPHENE_BEGIN_DECLS
#define graphene_simd4f_get_y(s)
#define graphene_simd4f_shuffle_wxyz(s)
#define graphene_simd4f_cmp_eq(a, b)
static graphene_simd4f_t graphene_simd4f_normalize3(const graphene_simd4f_t v)
#define graphene_simd4f_zero_w(v)
#define graphene_simd4f_neg(s)
#define graphene_simd4f_mul(a, b)
#define graphene_simd4f_shuffle_zwxy(s)
#define graphene_simd4f_merge_w(s, v)
static float graphene_simd4f_sum_scalar(const graphene_simd4f_t v)
#define graphene_simd4f_splat_z(s)
#define graphene_simd4f_flip_sign_1010(s)
#define graphene_simd4f_dup_4f(s, v)
#define graphene_simd4f_splat_y(s)
#define graphene_simd4f_merge_high(a, b)
#define graphene_simd4f_get_x(s)
#define graphene_simd4f_init_4f(v)
#define graphene_simd4f_get_z(s)
#define graphene_simd4f_splat_x(s)
static graphene_simd4f_t graphene_simd4f_sum(const graphene_simd4f_t v)
#define graphene_simd4f_dot3(a, b)
#define graphene_simd4f_init(x, y, z, w)
#define graphene_simd4f_splat(v)
static graphene_simd4f_t graphene_simd4f_madd(const graphene_simd4f_t m1, const graphene_simd4f_t m2, const graphene_simd4f_t a)
#define graphene_simd4f_flip_sign_0101(s)
#define graphene_simd4f_get_w(s)
#define graphene_simd4f_reciprocal(s)
#define graphene_simd4f_div(a, b)
#define graphene_simd4f_shuffle_yzwx(s)
#define graphene_simd4f_sub(a, b)
#define graphene_simd4f_splat_w(s)
#define graphene_simd4f_cross3(a, b)
#define graphene_simd4f_add(a, b)
static void graphene_simd4x4f_inv_ortho_vec3_mul(const graphene_simd4x4f_t *a, const graphene_simd4f_t *b, graphene_simd4f_t *res)
static void graphene_simd4x4f_to_float(const graphene_simd4x4f_t *m, float *v)
static void graphene_simd4x4f_div(const graphene_simd4x4f_t *a, const graphene_simd4x4f_t *b, graphene_simd4x4f_t *res)
static void graphene_simd4x4f_determinant(const graphene_simd4x4f_t *m, graphene_simd4f_t *det_r, graphene_simd4f_t *invdet_r)
static void graphene_simd4x4f_init_ortho(graphene_simd4x4f_t *m, float left, float right, float bottom, float top, float z_near, float z_far)
static void graphene_simd4x4f_init_frustum(graphene_simd4x4f_t *m, float left, float right, float bottom, float top, float z_near, float z_far)
static void graphene_simd4x4f_sub(const graphene_simd4x4f_t *a, const graphene_simd4x4f_t *b, graphene_simd4x4f_t *res)
static void graphene_simd4x4f_sum(const graphene_simd4x4f_t *a, graphene_simd4f_t *res)
static bool graphene_simd4x4f_inverse(const graphene_simd4x4f_t *m, graphene_simd4x4f_t *res)
static void graphene_simd4x4f_mul(const graphene_simd4x4f_t *a, const graphene_simd4x4f_t *b, graphene_simd4x4f_t *res)
static void graphene_simd4x4f_perspective(graphene_simd4x4f_t *m, float depth)
static GRAPHENE_BEGIN_DECLS graphene_simd4x4f_t GRAPHENE_VECTORCALL graphene_simd4x4f_init(graphene_simd4f_t x, graphene_simd4f_t y, graphene_simd4f_t z, graphene_simd4f_t w)
static void graphene_simd4x4f_init_perspective(graphene_simd4x4f_t *m, float fovy_rad, float aspect, float z_near, float z_far)
static void graphene_simd4x4f_vec3_mul(const graphene_simd4x4f_t *m, const graphene_simd4f_t *v, graphene_simd4f_t *res)
static bool graphene_simd4x4f_is_identity(const graphene_simd4x4f_t *m)
static void graphene_simd4x4f_transpose(const graphene_simd4x4f_t *s, graphene_simd4x4f_t *res)
#define graphene_simd4x4f_transpose_in_place(s)
static void graphene_simd4x4f_matrix_mul(const graphene_simd4x4f_t *a, const graphene_simd4x4f_t *b, graphene_simd4x4f_t *res)
static void graphene_simd4x4f_init_look_at(graphene_simd4x4f_t *m, graphene_simd4f_t eye, graphene_simd4f_t center, graphene_simd4f_t up)
static void graphene_simd4x4f_point3_mul(const graphene_simd4x4f_t *m, const graphene_simd4f_t *p, graphene_simd4f_t *res)
static void graphene_simd4x4f_translation(graphene_simd4x4f_t *m, float x, float y, float z)
static void graphene_simd4x4f_init_from_float(graphene_simd4x4f_t *m, const float *f)
static void graphene_simd4x4f_rotation(graphene_simd4x4f_t *m, float rad, graphene_simd4f_t axis)
static void graphene_simd4x4f_vec4_mul(const graphene_simd4x4f_t *a, const graphene_simd4f_t *b, graphene_simd4f_t *res)
static bool graphene_simd4x4f_is_2d(const graphene_simd4x4f_t *m)
static void graphene_simd4x4f_add(const graphene_simd4x4f_t *a, const graphene_simd4x4f_t *b, graphene_simd4x4f_t *res)
static void graphene_simd4x4f_scale(graphene_simd4x4f_t *m, float x, float y, float z)
static void graphene_simd4x4f_inv_ortho_point3_mul(const graphene_simd4x4f_t *a, const graphene_simd4f_t *b, graphene_simd4f_t *res)
static void graphene_simd4x4f_init_identity(graphene_simd4x4f_t *m)
#define GRAPHENE_AVAILABLE_IN_1_0
lu_byte right
lu_byte left
CURL_EXTERN CURLMcode curl_socket_t s
Definition multi.h:318
graphene_simd4f_t w
graphene_simd4f_t z
graphene_simd4f_t y
graphene_simd4f_t x