add ode

author: sanine <sanine.not@pm.me> 2022-10-01 20:59:36 -0500
committer: sanine <sanine.not@pm.me> 2022-10-01 20:59:36 -0500
commit: c5fc66ee58f2c60f2d226868bb1cf5b91badaf53 (patch)
tree: 277dd280daf10bf77013236b8edfa5f88708c7e0 /libs/ode-0.16.1/ode/src/collision_sapspace.cpp
parent: 1cf9cc3408af7008451f9133fb95af66a9697d15 (diff)
1 files changed, 853 insertions, 0 deletions
diff --git a/libs/ode-0.16.1/ode/src/collision_sapspace.cpp b/libs/ode-0.16.1/ode/src/collision_sapspace.cpp
new file mode 100644
index 0000000..76258bf
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_sapspace.cpp
@@ -0,0 +1,853 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *  Sweep and Prune adaptation/tweaks for ODE by Aras Pranckevicius.
+ *  Additional work by David Walters
+ *  Original code:
+ *    OPCODE - Optimized Collision Detection
+ *    Copyright (C) 2001 Pierre Terdiman
+ *    Homepage: http://www.codercorner.com/Opcode.htm
+ *
+ *  This version does complete radix sort, not "classical" SAP. So, we
+ *  have no temporal coherence, but are able to handle any movement
+ *  velocities equally well.
+ */
+
+#include <ode/common.h>
+#include <ode/collision_space.h>
+#include <ode/collision.h>
+
+#include "config.h"
+#include "matrix.h"
+#include "collision_kernel.h"
+#include "collision_space_internal.h"
+
+// Reference counting helper for radix sort global data.
+//static void RadixSortRef();
+//static void RadixSortDeref();
+
+
+// --------------------------------------------------------------------------
+//  Radix Sort Context
+// --------------------------------------------------------------------------
+
+struct RaixSortContext
+{
+public:
+    RaixSortContext(): mCurrentSize(0), mCurrentUtilization(0), mRanksValid(false), mRanksBuffer(NULL), mPrimaryRanks(NULL) {}
+    ~RaixSortContext() { FreeRanks(); }
+
+    // OPCODE's Radix Sorting, returns a list of indices in sorted order
+    const uint32* RadixSort( const float* input2, uint32 nb );
+
+private:
+    void FreeRanks();
+    void AllocateRanks(sizeint nNewSize);
+
+    void ReallocateRanksIfNecessary(sizeint nNewSize);
+
+private:
+    void SetCurrentSize(sizeint nValue) { mCurrentSize = nValue; }
+    sizeint GetCurrentSize() const { return mCurrentSize; }
+
+    void SetCurrentUtilization(sizeint nValue) { mCurrentUtilization = nValue; }
+    sizeint GetCurrentUtilization() const { return mCurrentUtilization; }
+
+    uint32 *GetRanks1() const { return mPrimaryRanks; }
+    uint32 *GetRanks2() const { return mRanksBuffer + ((mRanksBuffer + mCurrentSize) - mPrimaryRanks); }
+    void SwapRanks() { mPrimaryRanks = GetRanks2(); }
+
+    bool AreRanksValid() const { return mRanksValid; }
+    void InvalidateRanks() { mRanksValid = false; }
+    void ValidateRanks() { mRanksValid = true; }
+
+private:
+    sizeint mCurrentSize;						//!< Current size of the indices list
+    sizeint mCurrentUtilization;					//!< Current utilization of the indices list
+    bool mRanksValid;
+    uint32* mRanksBuffer;						//!< Two lists allocated sequentially in a single block
+    uint32* mPrimaryRanks;
+};
+
+void RaixSortContext::AllocateRanks(sizeint nNewSize)
+{
+    dIASSERT(GetCurrentSize() == 0);
+
+    mRanksBuffer = new uint32[2 * nNewSize];
+    mPrimaryRanks = mRanksBuffer;
+
+    SetCurrentSize(nNewSize);
+}
+
+void RaixSortContext::FreeRanks()
+{
+    SetCurrentSize(0);
+
+    delete[] mRanksBuffer;
+}
+
+void RaixSortContext::ReallocateRanksIfNecessary(sizeint nNewSize)
+{
+    sizeint nCurUtilization = GetCurrentUtilization();
+
+    if (nNewSize != nCurUtilization)
+    {
+        sizeint nCurSize = GetCurrentSize();
+
+        if ( nNewSize > nCurSize )
+        {
+            // Free previously used ram
+            FreeRanks();
+
+            // Get some fresh one
+            AllocateRanks(nNewSize);
+        }
+
+        InvalidateRanks();
+        SetCurrentUtilization(nNewSize);
+    }
+}
+
+// --------------------------------------------------------------------------
+//  SAP space code
+// --------------------------------------------------------------------------
+
+struct dxSAPSpace : public dxSpace
+{
+    // Constructor / Destructor
+    dxSAPSpace( dSpaceID _space, int sortaxis );
+    ~dxSAPSpace();
+
+    // dxSpace
+    virtual dxGeom* getGeom(int i);
+    virtual void add(dxGeom* g);
+    virtual void remove(dxGeom* g);
+    virtual void dirty(dxGeom* g);
+    virtual void computeAABB();
+    virtual void cleanGeoms();
+    virtual void collide( void *data, dNearCallback *callback );
+    virtual void collide2( void *data, dxGeom *geom, dNearCallback *callback );
+
+private:
+
+    //--------------------------------------------------------------------------
+    // Local Declarations
+    //--------------------------------------------------------------------------
+
+    //! A generic couple structure
+    struct Pair
+    {
+        uint32 id0;	//!< First index of the pair
+        uint32 id1;	//!< Second index of the pair
+
+        // Default and Value Constructor
+        Pair() {}
+        Pair( uint32 i0, uint32 i1 ) : id0( i0 ), id1( i1 ) {}
+    };
+
+    //--------------------------------------------------------------------------
+    // Helpers
+    //--------------------------------------------------------------------------
+
+    /**
+    *	Complete box pruning.
+    *  Returns a list of overlapping pairs of boxes, each box of the pair
+    *  belongs to the same set.
+    *
+    *	@param	count	[in] number of boxes.
+    *	@param	geoms	[in] geoms of boxes.
+    *	@param	pairs	[out] array of overlapping pairs.
+    */
+    void BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs );
+
+
+    //--------------------------------------------------------------------------
+    // Implementation Data
+    //--------------------------------------------------------------------------
+
+    // We have two lists (arrays of pointers) to dirty and clean
+    // geoms. Each geom knows it's index into the corresponding list
+    // (see macros above).
+    dArray<dxGeom*> DirtyList; // dirty geoms
+    dArray<dxGeom*> GeomList;	// clean geoms
+
+    // For SAP, we ultimately separate "normal" geoms and the ones that have
+    // infinite AABBs. No point doing SAP on infinite ones (and it doesn't handle
+    // infinite geoms anyway).
+    dArray<dxGeom*> TmpGeomList;	// temporary for normal geoms
+    dArray<dxGeom*> TmpInfGeomList;	// temporary for geoms with infinite AABBs
+
+    // Our sorting axes. (X,Z,Y is often best). Stored *2 for minor speedup
+    // Axis indices into geom's aabb are: min=idx, max=idx+1
+    uint32 ax0idx;
+    uint32 ax1idx;
+    uint32 ax2idx;
+
+    // pruning position array scratch pad
+    // NOTE: this is float not dReal because of the OPCODE radix sorter
+    dArray< float > poslist;
+    RaixSortContext	sortContext;
+};
+
+// Creation
+dSpaceID dSweepAndPruneSpaceCreate( dxSpace* space, int axisorder ) {
+    return new dxSAPSpace( space, axisorder );
+}
+
+
+//==============================================================================
+
+#define GEOM_ENABLED(g) (((g)->gflags & GEOM_ENABLE_TEST_MASK) == GEOM_ENABLE_TEST_VALUE)
+
+// HACK: We abuse 'next' and 'tome' members of dxGeom to store indices into dirty/geom lists.
+#define GEOM_SET_DIRTY_IDX(g,idx) { (g)->next_ex = (dxGeom*)(sizeint)(idx); }
+#define GEOM_SET_GEOM_IDX(g,idx) { (g)->tome_ex = (dxGeom**)(sizeint)(idx); }
+#define GEOM_GET_DIRTY_IDX(g) ((int)(sizeint)(g)->next_ex)
+#define GEOM_GET_GEOM_IDX(g) ((int)(sizeint)(g)->tome_ex)
+#define GEOM_INVALID_IDX (-1)
+
+
+/*
+*  A bit of repetitive work - similar to collideAABBs, but doesn't check
+*  if AABBs intersect (because SAP returns pairs with overlapping AABBs).
+*/
+static void collideGeomsNoAABBs( dxGeom *g1, dxGeom *g2, void *data, dNearCallback *callback )
+{
+    dIASSERT( (g1->gflags & GEOM_AABB_BAD)==0 );
+    dIASSERT( (g2->gflags & GEOM_AABB_BAD)==0 );
+
+    // no contacts if both geoms on the same body, and the body is not 0
+    if (g1->body == g2->body && g1->body) return;
+
+    // test if the category and collide bitfields match
+    if ( ((g1->category_bits & g2->collide_bits) ||
+        (g2->category_bits & g1->collide_bits)) == 0) {
+            return;
+    }
+
+    dReal *bounds1 = g1->aabb;
+    dReal *bounds2 = g2->aabb;
+
+    // check if either object is able to prove that it doesn't intersect the
+    // AABB of the other
+    if (g1->AABBTest (g2,bounds2) == 0) return;
+    if (g2->AABBTest (g1,bounds1) == 0) return;
+
+    // the objects might actually intersect - call the space callback function
+    callback (data,g1,g2);
+}
+
+
+dxSAPSpace::dxSAPSpace( dSpaceID _space, int axisorder ) : dxSpace( _space )
+{
+    type = dSweepAndPruneSpaceClass;
+
+    // Init AABB to infinity
+    aabb[0] = -dInfinity;
+    aabb[1] = dInfinity;
+    aabb[2] = -dInfinity;
+    aabb[3] = dInfinity;
+    aabb[4] = -dInfinity;
+    aabb[5] = dInfinity;
+
+    ax0idx = ( ( axisorder ) & 3 ) << 1;
+    ax1idx = ( ( axisorder >> 2 ) & 3 ) << 1;
+    ax2idx = ( ( axisorder >> 4 ) & 3 ) << 1;
+}
+
+dxSAPSpace::~dxSAPSpace()
+{
+    CHECK_NOT_LOCKED(this);
+    if ( cleanup ) {
+        // note that destroying each geom will call remove()
+        for ( ; DirtyList.size(); dGeomDestroy( DirtyList[ 0 ] ) ) {}
+        for ( ; GeomList.size(); dGeomDestroy( GeomList[ 0 ] ) ) {}
+    }
+    else {
+        // just unhook them
+        for ( ; DirtyList.size(); remove( DirtyList[ 0 ] ) ) {}
+        for ( ; GeomList.size(); remove( GeomList[ 0 ] ) ) {}
+    }
+}
+
+dxGeom* dxSAPSpace::getGeom( int i )
+{
+    dUASSERT( i >= 0 && i < count, "index out of range" );
+    int dirtySize = DirtyList.size();
+    if( i < dirtySize )
+        return DirtyList[i];
+    else
+        return GeomList[i-dirtySize];
+}
+
+void dxSAPSpace::add( dxGeom* g )
+{
+    CHECK_NOT_LOCKED (this);
+    dAASSERT(g);
+    dUASSERT(g->tome_ex == 0 && g->next_ex == 0, "geom is already in a space");
+
+
+    // add to dirty list
+    GEOM_SET_DIRTY_IDX( g, DirtyList.size() );
+    GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX );
+    DirtyList.push( g );
+
+    dxSpace::add(g);
+}
+
+void dxSAPSpace::remove( dxGeom* g )
+{
+    CHECK_NOT_LOCKED(this);
+    dAASSERT(g);
+    dUASSERT(g->parent_space == this,"object is not in this space");
+
+    // remove
+    int dirtyIdx = GEOM_GET_DIRTY_IDX(g);
+    int geomIdx = GEOM_GET_GEOM_IDX(g);
+    // must be in one list, not in both
+    dUASSERT(
+        (dirtyIdx==GEOM_INVALID_IDX && geomIdx>=0 && geomIdx<GeomList.size()) ||
+        (geomIdx==GEOM_INVALID_IDX && dirtyIdx>=0 && dirtyIdx<DirtyList.size()),
+        "geom indices messed up" );
+    if( dirtyIdx != GEOM_INVALID_IDX ) {
+        // we're in dirty list, remove
+        int dirtySize = DirtyList.size();
+        if (dirtyIdx != dirtySize-1) {
+            dxGeom* lastG = DirtyList[dirtySize-1];
+            DirtyList[dirtyIdx] = lastG;
+            GEOM_SET_DIRTY_IDX(lastG,dirtyIdx);
+        }
+        GEOM_SET_DIRTY_IDX(g,GEOM_INVALID_IDX);
+        DirtyList.setSize( dirtySize-1 );
+    } else {
+        // we're in geom list, remove
+        int geomSize = GeomList.size();
+        if (geomIdx != geomSize-1) {
+            dxGeom* lastG = GeomList[geomSize-1];
+            GeomList[geomIdx] = lastG;
+            GEOM_SET_GEOM_IDX(lastG,geomIdx);
+        }
+        GEOM_SET_GEOM_IDX(g,GEOM_INVALID_IDX);
+        GeomList.setSize( geomSize-1 );
+    }
+
+    dxSpace::remove(g);
+}
+
+void dxSAPSpace::dirty( dxGeom* g )
+{
+    dAASSERT(g);
+    dUASSERT(g->parent_space == this, "object is not in this space");
+
+    // check if already dirtied
+    int dirtyIdx = GEOM_GET_DIRTY_IDX(g);
+    if( dirtyIdx != GEOM_INVALID_IDX )
+        return;
+
+    int geomIdx = GEOM_GET_GEOM_IDX(g);
+    dUASSERT( geomIdx>=0 && geomIdx<GeomList.size(), "geom indices messed up" );
+
+    // remove from geom list, place last in place of this
+    int geomSize = GeomList.size();
+    if (geomIdx != geomSize-1) {
+        dxGeom* lastG = GeomList[geomSize-1];
+        GeomList[geomIdx] = lastG;
+        GEOM_SET_GEOM_IDX(lastG,geomIdx);
+    }
+    GeomList.setSize( geomSize-1 );
+
+    // add to dirty list
+    GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX );
+    GEOM_SET_DIRTY_IDX( g, DirtyList.size() );
+    DirtyList.push( g );
+}
+
+void dxSAPSpace::computeAABB()
+{
+    // TODO?
+}
+
+void dxSAPSpace::cleanGeoms()
+{
+    int dirtySize = DirtyList.size();
+    if( !dirtySize )
+        return;
+
+    // compute the AABBs of all dirty geoms, clear the dirty flags,
+    // remove from dirty list, place into geom list
+    lock_count++;
+
+    int geomSize = GeomList.size();
+    GeomList.setSize( geomSize + dirtySize ); // ensure space in geom list
+
+    for( int i = 0; i < dirtySize; ++i ) {
+        dxGeom* g = DirtyList[i];
+        if( IS_SPACE(g) ) {
+            ((dxSpace*)g)->cleanGeoms();
+        }
+        
+        g->recomputeAABB();
+        dIASSERT((g->gflags & GEOM_AABB_BAD) == 0);
+        
+        g->gflags &= ~GEOM_DIRTY;
+        
+        // remove from dirty list, add to geom list
+        GEOM_SET_DIRTY_IDX( g, GEOM_INVALID_IDX );
+        GEOM_SET_GEOM_IDX( g, geomSize + i );
+        GeomList[geomSize+i] = g;
+    }
+    // clear dirty list
+    DirtyList.setSize( 0 );
+
+    lock_count--;
+}
+
+void dxSAPSpace::collide( void *data, dNearCallback *callback )
+{
+    dAASSERT (callback);
+
+    lock_count++;
+
+    cleanGeoms();
+
+    // by now all geoms are in GeomList, and DirtyList must be empty
+    int geom_count = GeomList.size();
+    dUASSERT( geom_count == count, "geom counts messed up" );
+
+    // separate all ENABLED geoms into infinite AABBs and normal AABBs
+    TmpGeomList.setSize(0);
+    TmpInfGeomList.setSize(0);
+    int axis0max = ax0idx + 1;
+    for( int i = 0; i < geom_count; ++i ) {
+        dxGeom* g = GeomList[i];
+        if( !GEOM_ENABLED(g) ) // skip disabled ones
+            continue;
+        const dReal& amax = g->aabb[axis0max];
+        if( amax == dInfinity ) // HACK? probably not...
+            TmpInfGeomList.push( g );
+        else
+            TmpGeomList.push( g );
+    }
+
+    // do SAP on normal AABBs
+    dArray< Pair > overlapBoxes;
+    int tmp_geom_count = TmpGeomList.size();
+    if ( tmp_geom_count > 0 )
+    {
+        // Generate a list of overlapping boxes
+        BoxPruning( tmp_geom_count, (const dxGeom**)TmpGeomList.data(), overlapBoxes );
+    }
+
+    // collide overlapping
+    int overlapCount = overlapBoxes.size();
+    for( int j = 0; j < overlapCount; ++j )
+    {
+        const Pair& pair = overlapBoxes[ j ];
+        dxGeom* g1 = TmpGeomList[ pair.id0 ];
+        dxGeom* g2 = TmpGeomList[ pair.id1 ];
+        collideGeomsNoAABBs( g1, g2, data, callback );
+    }
+
+    int infSize = TmpInfGeomList.size();
+    int normSize = TmpGeomList.size();
+    int m, n;
+
+    for ( m = 0; m < infSize; ++m )
+    {
+        dxGeom* g1 = TmpInfGeomList[ m ];
+
+        // collide infinite ones
+        for( n = m+1; n < infSize; ++n ) {
+            dxGeom* g2 = TmpInfGeomList[n];
+            collideGeomsNoAABBs( g1, g2, data, callback );
+        }
+
+        // collide infinite ones with normal ones
+        for( n = 0; n < normSize; ++n ) {
+            dxGeom* g2 = TmpGeomList[n];
+            collideGeomsNoAABBs( g1, g2, data, callback );
+        }
+    }
+
+    lock_count--;
+}
+
+void dxSAPSpace::collide2( void *data, dxGeom *geom, dNearCallback *callback )
+{
+    dAASSERT (geom && callback);
+
+    // TODO: This is just a simple N^2 implementation
+
+    lock_count++;
+
+    cleanGeoms();
+    geom->recomputeAABB();
+
+    // intersect bounding boxes
+    int geom_count = GeomList.size();
+    for ( int i = 0; i < geom_count; ++i ) {
+        dxGeom* g = GeomList[i];
+        if ( GEOM_ENABLED(g) )
+            collideAABBs (g,geom,data,callback);
+    }
+
+    lock_count--;
+}
+
+
+void dxSAPSpace::BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs )
+{
+    // Size the poslist (+1 for infinity end cap)
+    poslist.setSize( count );
+
+    // 1) Build main list using the primary axis
+    //  NOTE: uses floats instead of dReals because that's what radix sort wants
+    for( int i = 0; i < count; ++i )
+        poslist[ i ] = (float)TmpGeomList[i]->aabb[ ax0idx ];
+
+    // 2) Sort the list
+    const uint32* Sorted = sortContext.RadixSort( poslist.data(), count );
+
+    // 3) Prune the list
+    const uint32* const LastSorted = Sorted + count;
+    const uint32* RunningAddress = Sorted;
+
+    bool bExitLoop;
+    Pair IndexPair;
+    while ( Sorted < LastSorted )
+    {
+        IndexPair.id0 = *Sorted++;
+
+        // empty, this loop just advances RunningAddress
+        for (bExitLoop = false; poslist[*RunningAddress++] < poslist[IndexPair.id0]; )
+        {
+            if (RunningAddress == LastSorted)
+            {
+                bExitLoop = true;
+                break;
+            }
+        }
+
+        if ( bExitLoop || RunningAddress == LastSorted) // Not a bug!!!
+        {
+            break;
+        }
+
+        const float idx0ax0max = (float)geoms[IndexPair.id0]->aabb[ax0idx+1]; // To avoid wrong decisions caused by rounding errors, cast the AABB element to float similarly as we did at the function beginning
+        const dReal idx0ax1max = geoms[IndexPair.id0]->aabb[ax1idx+1];
+        const dReal idx0ax2max = geoms[IndexPair.id0]->aabb[ax2idx+1];
+
+        for (const uint32* RunningAddress2 = RunningAddress; poslist[ IndexPair.id1 = *RunningAddress2++ ] <= idx0ax0max; )
+        {
+            const dReal* aabb0 = geoms[ IndexPair.id0 ]->aabb;
+            const dReal* aabb1 = geoms[ IndexPair.id1 ]->aabb;
+
+            // Intersection?
+            if ( idx0ax1max >= aabb1[ax1idx] && aabb1[ax1idx+1] >= aabb0[ax1idx] 
+                && idx0ax2max >= aabb1[ax2idx] && aabb1[ax2idx+1] >= aabb0[ax2idx] )
+            {
+                pairs.push( IndexPair );
+            }
+
+            if (RunningAddress2 == LastSorted)
+            {
+                break;
+            }
+        }
+
+    } // while ( RunningAddress < LastSorted && Sorted < LastSorted )
+}
+
+
+//==============================================================================
+
+//------------------------------------------------------------------------------
+// Radix Sort
+//------------------------------------------------------------------------------
+
+
+
+#define CHECK_PASS_VALIDITY(pass)															\
+    /* Shortcut to current counters */														\
+    const uint32* CurCount = &mHistogram[pass<<8];												\
+    \
+    /* Reset flag. The sorting pass is supposed to be performed. (default) */				\
+    bool PerformPass = true;																\
+    \
+    /* Check pass validity */																\
+    \
+    /* If all values have the same byte, sorting is useless. */								\
+    /* It may happen when sorting bytes or words instead of dwords. */						\
+    /* This routine actually sorts words faster than dwords, and bytes */					\
+    /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */				\
+    /* for words and O(n) for bytes. Running time for floats depends on actual values... */	\
+    \
+    /* Get first byte */																	\
+    uint8 UniqueVal = *(((const uint8*)input)+pass);												\
+    \
+    /* Check that byte's counter */															\
+    if(CurCount[UniqueVal]==nb)	PerformPass=false;
+
+// WARNING ONLY SORTS IEEE FLOATING-POINT VALUES
+const uint32* RaixSortContext::RadixSort( const float* input2, uint32 nb )
+{
+    union _type_cast_union
+    {
+        _type_cast_union(const float *floats): asFloats(floats) {}
+        _type_cast_union(const uint32 *uints32): asUInts32(uints32) {}
+
+        const float *asFloats;
+        const uint32 *asUInts32;
+        const uint8 *asUInts8;
+    };
+
+    const uint32* input = _type_cast_union(input2).asUInts32;
+
+    // Resize lists if needed
+    ReallocateRanksIfNecessary(nb);
+
+    // Allocate histograms & offsets on the stack
+    uint32 mHistogram[256*4];
+    uint32* mLink[256];
+
+    // Create histograms (counters). Counters for all passes are created in one run.
+    // Pros:	read input buffer once instead of four times
+    // Cons:	mHistogram is 4Kb instead of 1Kb
+    // Floating-point values are always supposed to be signed values, so there's only one code path there.
+    // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
+    // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
+    // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
+    // wouldn't work with mixed positive/negative values....
+    {
+        /* Clear counters/histograms */
+        memset(mHistogram, 0, 256*4*sizeof(uint32));
+
+        /* Prepare to count */
+        const uint8* p = _type_cast_union(input).asUInts8;
+        const uint8* pe = &p[nb*4];
+        uint32* h0= &mHistogram[0];		/* Histogram for first pass (LSB)	*/
+        uint32* h1= &mHistogram[256];	/* Histogram for second pass		*/
+        uint32* h2= &mHistogram[512];	/* Histogram for third pass			*/
+        uint32* h3= &mHistogram[768];	/* Histogram for last pass (MSB)	*/
+
+        bool AlreadySorted = true;	/* Optimism... */
+
+        if (!AreRanksValid())
+        {
+            /* Prepare for temporal coherence */
+            const float* Running = input2;
+            float PrevVal = *Running;
+
+            while(p!=pe)
+            {
+                /* Read input input2 in previous sorted order */
+                float Val = *Running++;
+                /* Check whether already sorted or not */
+                if(Val<PrevVal)	{ AlreadySorted = false; break; } /* Early out */
+                /* Update for next iteration */
+                PrevVal = Val;
+
+                /* Create histograms */
+                h0[*p++]++;	h1[*p++]++;	h2[*p++]++;	h3[*p++]++;
+            }
+
+            /* If all input values are already sorted, we just have to return and leave the */
+            /* previous list unchanged. That way the routine may take advantage of temporal */
+            /* coherence, for example when used to sort transparent faces.					*/
+            if(AlreadySorted)
+            {
+                uint32* const Ranks1 = GetRanks1();
+                for(uint32 i=0;i<nb;i++)	Ranks1[i] = i;
+                return Ranks1;
+            }
+        }
+        else
+        {
+            /* Prepare for temporal coherence */
+            uint32* const Ranks1 = GetRanks1();
+
+            uint32* Indices = Ranks1;
+            float PrevVal = input2[*Indices];
+
+            while(p!=pe)
+            {
+                /* Read input input2 in previous sorted order */
+                float Val = input2[*Indices++];
+                /* Check whether already sorted or not */
+                if(Val<PrevVal)	{ AlreadySorted = false; break; } /* Early out */
+                /* Update for next iteration */
+                PrevVal = Val;
+
+                /* Create histograms */
+                h0[*p++]++;	h1[*p++]++;	h2[*p++]++;	h3[*p++]++;
+            }
+
+            /* If all input values are already sorted, we just have to return and leave the */
+            /* previous list unchanged. That way the routine may take advantage of temporal */
+            /* coherence, for example when used to sort transparent faces.					*/
+            if(AlreadySorted)	{ return Ranks1;	}
+        }
+
+        /* Else there has been an early out and we must finish computing the histograms */
+        while(p!=pe)
+        {
+            /* Create histograms without the previous overhead */
+            h0[*p++]++;	h1[*p++]++;	h2[*p++]++;	h3[*p++]++;
+        }
+    }
+
+    // Compute #negative values involved if needed
+    uint32 NbNegativeValues = 0;
+
+    // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
+    // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
+    // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
+    uint32* h3= &mHistogram[768];
+    for(uint32 i=128;i<256;i++)	NbNegativeValues += h3[i];	// 768 for last histogram, 128 for negative part
+
+    // Radix sort, j is the pass number (0=LSB, 3=MSB)
+    for(uint32 j=0;j<4;j++)
+    {
+        // Should we care about negative values?
+        if(j!=3)
+        {
+            // Here we deal with positive values only
+            CHECK_PASS_VALIDITY(j);
+
+            if(PerformPass)
+            {
+                uint32* const Ranks2 = GetRanks2();
+                // Create offsets
+                mLink[0] = Ranks2;
+                for(uint32 i=1;i<256;i++)		mLink[i] = mLink[i-1] + CurCount[i-1];
+
+                // Perform Radix Sort
+                const uint8* InputBytes = _type_cast_union(input).asUInts8;
+                InputBytes += j;
+                if (!AreRanksValid())
+                {
+                    for(uint32 i=0;i<nb;i++)
+                    {
+                        *mLink[InputBytes[i<<2]]++ = i;
+                    }
+
+                    ValidateRanks();
+                }
+                else
+                {
+                    uint32* const Ranks1 = GetRanks1();
+
+                    uint32* Indices				= Ranks1;
+                    uint32* const IndicesEnd	= Ranks1 + nb;
+                    while(Indices!=IndicesEnd)
+                    {
+                        uint32 id = *Indices++;
+                        *mLink[InputBytes[id<<2]]++ = id;
+                    }
+                }
+
+                // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
+                SwapRanks();
+            }
+        }
+        else
+        {
+            // This is a special case to correctly handle negative values
+            CHECK_PASS_VALIDITY(j);
+
+            if(PerformPass)
+            {
+                uint32* const Ranks2 = GetRanks2();
+
+                // Create biased offsets, in order for negative numbers to be sorted as well
+                mLink[0] = Ranks2 + NbNegativeValues;										// First positive number takes place after the negative ones
+                for(uint32 i=1;i<128;i++)		mLink[i] = mLink[i-1] + CurCount[i-1];		// 1 to 128 for positive numbers
+
+                // We must reverse the sorting order for negative numbers!
+                mLink[255] = Ranks2;
+                for(uint32 i=0;i<127;i++)	mLink[254-i] = mLink[255-i] + CurCount[255-i];		// Fixing the wrong order for negative values
+                for(uint32 i=128;i<256;i++)	mLink[i] += CurCount[i];							// Fixing the wrong place for negative values
+
+                // Perform Radix Sort
+                if (!AreRanksValid())
+                {
+                    for(uint32 i=0;i<nb;i++)
+                    {
+                        uint32 Radix = input[i]>>24;							// Radix byte, same as above. AND is useless here (uint32).
+                        // ### cmp to be killed. Not good. Later.
+                        if(Radix<128)		*mLink[Radix]++ = i;		// Number is positive, same as above
+                        else				*(--mLink[Radix]) = i;		// Number is negative, flip the sorting order
+                    }
+
+                    ValidateRanks();
+                }
+                else
+                {
+                    uint32* const Ranks1 = GetRanks1();
+
+                    for(uint32 i=0;i<nb;i++)
+                    {
+                        uint32 Radix = input[Ranks1[i]]>>24;							// Radix byte, same as above. AND is useless here (uint32).
+                        // ### cmp to be killed. Not good. Later.
+                        if(Radix<128)		*mLink[Radix]++ = Ranks1[i];		// Number is positive, same as above
+                        else				*(--mLink[Radix]) = Ranks1[i];		// Number is negative, flip the sorting order
+                    }
+                }
+                // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
+                SwapRanks();
+            }
+            else
+            {
+                // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
+                if(UniqueVal>=128)
+                {
+                    if (!AreRanksValid())
+                    {
+                        uint32* const Ranks2 = GetRanks2();
+                        // ###Possible?
+                        for(uint32 i=0;i<nb;i++)
+                        {
+                            Ranks2[i] = nb-i-1;
+                        }
+
+                        ValidateRanks();
+                    }
+                    else
+                    {
+                        uint32* const Ranks1 = GetRanks1();
+                        uint32* const Ranks2 = GetRanks2();
+                        for(uint32 i=0;i<nb;i++)	Ranks2[i] = Ranks1[nb-i-1];
+                    }
+
+                    // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
+                    SwapRanks();
+                }
+            }
+        }
+    }
+
+    // Return indices
+    uint32* const Ranks1 = GetRanks1();
+    return Ranks1;
+}
+
author	sanine <sanine.not@pm.me>	2022-10-01 20:59:36 -0500
committer	sanine <sanine.not@pm.me>	2022-10-01 20:59:36 -0500
commit	c5fc66ee58f2c60f2d226868bb1cf5b91badaf53 (patch)
tree	277dd280daf10bf77013236b8edfa5f88708c7e0 /libs/ode-0.16.1/ode/src/collision_sapspace.cpp
parent	1cf9cc3408af7008451f9133fb95af66a9697d15 (diff)