summaryrefslogtreecommitdiff
path: root/libs/ode-0.16.1/ode/src/fastdot_impl.h
blob: f32e7176eeaa0a73bf286f2c308ec8f804129441 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51


/*************************************************************************
 *                                                                       *
 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
 * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
 *                                                                       *
 * This library is free software; you can redistribute it and/or         *
 * modify it under the terms of EITHER:                                  *
 *   (1) The GNU Lesser General Public License as published by the Free  *
 *       Software Foundation; either version 2.1 of the License, or (at  *
 *       your option) any later version. The text of the GNU Lesser      *
 *       General Public License is included with this library in the     *
 *       file LICENSE.TXT.                                               *
 *   (2) The BSD-style license that is included with this library in     *
 *       the file LICENSE-BSD.TXT.                                       *
 *                                                                       *
 * This library is distributed in the hope that it will be useful,       *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
 * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
 *                                                                       *
 *************************************************************************/

#ifndef _ODE_FASTDOT_IMPL_H_
#define _ODE_FASTDOT_IMPL_H_


template<unsigned b_stride>
dReal calculateLargeVectorDot (const dReal *a, const dReal *b, unsigned n)
{
    dReal sum = 0;
    const dReal *a_end = a + (n & (int)(~3));
    for (; a != a_end; b += 4 * b_stride, a += 4) {
        dReal p0 = a[0], p1 = a[1], p2 = a[2], p3 = a[3];
        dReal q0 = b[0 * b_stride], q1 = b[1 * b_stride], q2 = b[2 * b_stride], q3 = b[3 * b_stride];
        dReal m0 = p0 * q0;
        dReal m1 = p1 * q1;
        dReal m2 = p2 * q2;
        dReal m3 = p3 * q3;
        sum += m0 + m1 + m2 + m3;
    }
    a_end += (n & 3);
    for (; a != a_end; b += b_stride, ++a) {
        sum += (*a) * (*b);
    }
    return sum;
}


#endif