diff options
author | sanine <sanine.not@pm.me> | 2022-10-01 20:59:36 -0500 |
---|---|---|
committer | sanine <sanine.not@pm.me> | 2022-10-01 20:59:36 -0500 |
commit | c5fc66ee58f2c60f2d226868bb1cf5b91badaf53 (patch) | |
tree | 277dd280daf10bf77013236b8edfa5f88708c7e0 /libs/ode-0.16.1/ode/src/collision_sapspace.cpp | |
parent | 1cf9cc3408af7008451f9133fb95af66a9697d15 (diff) |
add ode
Diffstat (limited to 'libs/ode-0.16.1/ode/src/collision_sapspace.cpp')
-rw-r--r-- | libs/ode-0.16.1/ode/src/collision_sapspace.cpp | 853 |
1 files changed, 853 insertions, 0 deletions
diff --git a/libs/ode-0.16.1/ode/src/collision_sapspace.cpp b/libs/ode-0.16.1/ode/src/collision_sapspace.cpp new file mode 100644 index 0000000..76258bf --- /dev/null +++ b/libs/ode-0.16.1/ode/src/collision_sapspace.cpp @@ -0,0 +1,853 @@ +/************************************************************************* + * * + * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. * + * All rights reserved. Email: russ@q12.org Web: www.q12.org * + * * + * This library is free software; you can redistribute it and/or * + * modify it under the terms of EITHER: * + * (1) The GNU Lesser General Public License as published by the Free * + * Software Foundation; either version 2.1 of the License, or (at * + * your option) any later version. The text of the GNU Lesser * + * General Public License is included with this library in the * + * file LICENSE.TXT. * + * (2) The BSD-style license that is included with this library in * + * the file LICENSE-BSD.TXT. * + * * + * This library is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * + * LICENSE.TXT and LICENSE-BSD.TXT for more details. * + * * + *************************************************************************/ + +/* + * Sweep and Prune adaptation/tweaks for ODE by Aras Pranckevicius. + * Additional work by David Walters + * Original code: + * OPCODE - Optimized Collision Detection + * Copyright (C) 2001 Pierre Terdiman + * Homepage: http://www.codercorner.com/Opcode.htm + * + * This version does complete radix sort, not "classical" SAP. So, we + * have no temporal coherence, but are able to handle any movement + * velocities equally well. + */ + +#include <ode/common.h> +#include <ode/collision_space.h> +#include <ode/collision.h> + +#include "config.h" +#include "matrix.h" +#include "collision_kernel.h" +#include "collision_space_internal.h" + +// Reference counting helper for radix sort global data. +//static void RadixSortRef(); +//static void RadixSortDeref(); + + +// -------------------------------------------------------------------------- +// Radix Sort Context +// -------------------------------------------------------------------------- + +struct RaixSortContext +{ +public: + RaixSortContext(): mCurrentSize(0), mCurrentUtilization(0), mRanksValid(false), mRanksBuffer(NULL), mPrimaryRanks(NULL) {} + ~RaixSortContext() { FreeRanks(); } + + // OPCODE's Radix Sorting, returns a list of indices in sorted order + const uint32* RadixSort( const float* input2, uint32 nb ); + +private: + void FreeRanks(); + void AllocateRanks(sizeint nNewSize); + + void ReallocateRanksIfNecessary(sizeint nNewSize); + +private: + void SetCurrentSize(sizeint nValue) { mCurrentSize = nValue; } + sizeint GetCurrentSize() const { return mCurrentSize; } + + void SetCurrentUtilization(sizeint nValue) { mCurrentUtilization = nValue; } + sizeint GetCurrentUtilization() const { return mCurrentUtilization; } + + uint32 *GetRanks1() const { return mPrimaryRanks; } + uint32 *GetRanks2() const { return mRanksBuffer + ((mRanksBuffer + mCurrentSize) - mPrimaryRanks); } + void SwapRanks() { mPrimaryRanks = GetRanks2(); } + + bool AreRanksValid() const { return mRanksValid; } + void InvalidateRanks() { mRanksValid = false; } + void ValidateRanks() { mRanksValid = true; } + +private: + sizeint mCurrentSize; //!< Current size of the indices list + sizeint mCurrentUtilization; //!< Current utilization of the indices list + bool mRanksValid; + uint32* mRanksBuffer; //!< Two lists allocated sequentially in a single block + uint32* mPrimaryRanks; +}; + +void RaixSortContext::AllocateRanks(sizeint nNewSize) +{ + dIASSERT(GetCurrentSize() == 0); + + mRanksBuffer = new uint32[2 * nNewSize]; + mPrimaryRanks = mRanksBuffer; + + SetCurrentSize(nNewSize); +} + +void RaixSortContext::FreeRanks() +{ + SetCurrentSize(0); + + delete[] mRanksBuffer; +} + +void RaixSortContext::ReallocateRanksIfNecessary(sizeint nNewSize) +{ + sizeint nCurUtilization = GetCurrentUtilization(); + + if (nNewSize != nCurUtilization) + { + sizeint nCurSize = GetCurrentSize(); + + if ( nNewSize > nCurSize ) + { + // Free previously used ram + FreeRanks(); + + // Get some fresh one + AllocateRanks(nNewSize); + } + + InvalidateRanks(); + SetCurrentUtilization(nNewSize); + } +} + +// -------------------------------------------------------------------------- +// SAP space code +// -------------------------------------------------------------------------- + +struct dxSAPSpace : public dxSpace +{ + // Constructor / Destructor + dxSAPSpace( dSpaceID _space, int sortaxis ); + ~dxSAPSpace(); + + // dxSpace + virtual dxGeom* getGeom(int i); + virtual void add(dxGeom* g); + virtual void remove(dxGeom* g); + virtual void dirty(dxGeom* g); + virtual void computeAABB(); + virtual void cleanGeoms(); + virtual void collide( void *data, dNearCallback *callback ); + virtual void collide2( void *data, dxGeom *geom, dNearCallback *callback ); + +private: + + //-------------------------------------------------------------------------- + // Local Declarations + //-------------------------------------------------------------------------- + + //! A generic couple structure + struct Pair + { + uint32 id0; //!< First index of the pair + uint32 id1; //!< Second index of the pair + + // Default and Value Constructor + Pair() {} + Pair( uint32 i0, uint32 i1 ) : id0( i0 ), id1( i1 ) {} + }; + + //-------------------------------------------------------------------------- + // Helpers + //-------------------------------------------------------------------------- + + /** + * Complete box pruning. + * Returns a list of overlapping pairs of boxes, each box of the pair + * belongs to the same set. + * + * @param count [in] number of boxes. + * @param geoms [in] geoms of boxes. + * @param pairs [out] array of overlapping pairs. + */ + void BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs ); + + + //-------------------------------------------------------------------------- + // Implementation Data + //-------------------------------------------------------------------------- + + // We have two lists (arrays of pointers) to dirty and clean + // geoms. Each geom knows it's index into the corresponding list + // (see macros above). + dArray<dxGeom*> DirtyList; // dirty geoms + dArray<dxGeom*> GeomList; // clean geoms + + // For SAP, we ultimately separate "normal" geoms and the ones that have + // infinite AABBs. No point doing SAP on infinite ones (and it doesn't handle + // infinite geoms anyway). + dArray<dxGeom*> TmpGeomList; // temporary for normal geoms + dArray<dxGeom*> TmpInfGeomList; // temporary for geoms with infinite AABBs + + // Our sorting axes. (X,Z,Y is often best). Stored *2 for minor speedup + // Axis indices into geom's aabb are: min=idx, max=idx+1 + uint32 ax0idx; + uint32 ax1idx; + uint32 ax2idx; + + // pruning position array scratch pad + // NOTE: this is float not dReal because of the OPCODE radix sorter + dArray< float > poslist; + RaixSortContext sortContext; +}; + +// Creation +dSpaceID dSweepAndPruneSpaceCreate( dxSpace* space, int axisorder ) { + return new dxSAPSpace( space, axisorder ); +} + + +//============================================================================== + +#define GEOM_ENABLED(g) (((g)->gflags & GEOM_ENABLE_TEST_MASK) == GEOM_ENABLE_TEST_VALUE) + +// HACK: We abuse 'next' and 'tome' members of dxGeom to store indices into dirty/geom lists. +#define GEOM_SET_DIRTY_IDX(g,idx) { (g)->next_ex = (dxGeom*)(sizeint)(idx); } +#define GEOM_SET_GEOM_IDX(g,idx) { (g)->tome_ex = (dxGeom**)(sizeint)(idx); } +#define GEOM_GET_DIRTY_IDX(g) ((int)(sizeint)(g)->next_ex) +#define GEOM_GET_GEOM_IDX(g) ((int)(sizeint)(g)->tome_ex) +#define GEOM_INVALID_IDX (-1) + + +/* +* A bit of repetitive work - similar to collideAABBs, but doesn't check +* if AABBs intersect (because SAP returns pairs with overlapping AABBs). +*/ +static void collideGeomsNoAABBs( dxGeom *g1, dxGeom *g2, void *data, dNearCallback *callback ) +{ + dIASSERT( (g1->gflags & GEOM_AABB_BAD)==0 ); + dIASSERT( (g2->gflags & GEOM_AABB_BAD)==0 ); + + // no contacts if both geoms on the same body, and the body is not 0 + if (g1->body == g2->body && g1->body) return; + + // test if the category and collide bitfields match + if ( ((g1->category_bits & g2->collide_bits) || + (g2->category_bits & g1->collide_bits)) == 0) { + return; + } + + dReal *bounds1 = g1->aabb; + dReal *bounds2 = g2->aabb; + + // check if either object is able to prove that it doesn't intersect the + // AABB of the other + if (g1->AABBTest (g2,bounds2) == 0) return; + if (g2->AABBTest (g1,bounds1) == 0) return; + + // the objects might actually intersect - call the space callback function + callback (data,g1,g2); +} + + +dxSAPSpace::dxSAPSpace( dSpaceID _space, int axisorder ) : dxSpace( _space ) +{ + type = dSweepAndPruneSpaceClass; + + // Init AABB to infinity + aabb[0] = -dInfinity; + aabb[1] = dInfinity; + aabb[2] = -dInfinity; + aabb[3] = dInfinity; + aabb[4] = -dInfinity; + aabb[5] = dInfinity; + + ax0idx = ( ( axisorder ) & 3 ) << 1; + ax1idx = ( ( axisorder >> 2 ) & 3 ) << 1; + ax2idx = ( ( axisorder >> 4 ) & 3 ) << 1; +} + +dxSAPSpace::~dxSAPSpace() +{ + CHECK_NOT_LOCKED(this); + if ( cleanup ) { + // note that destroying each geom will call remove() + for ( ; DirtyList.size(); dGeomDestroy( DirtyList[ 0 ] ) ) {} + for ( ; GeomList.size(); dGeomDestroy( GeomList[ 0 ] ) ) {} + } + else { + // just unhook them + for ( ; DirtyList.size(); remove( DirtyList[ 0 ] ) ) {} + for ( ; GeomList.size(); remove( GeomList[ 0 ] ) ) {} + } +} + +dxGeom* dxSAPSpace::getGeom( int i ) +{ + dUASSERT( i >= 0 && i < count, "index out of range" ); + int dirtySize = DirtyList.size(); + if( i < dirtySize ) + return DirtyList[i]; + else + return GeomList[i-dirtySize]; +} + +void dxSAPSpace::add( dxGeom* g ) +{ + CHECK_NOT_LOCKED (this); + dAASSERT(g); + dUASSERT(g->tome_ex == 0 && g->next_ex == 0, "geom is already in a space"); + + + // add to dirty list + GEOM_SET_DIRTY_IDX( g, DirtyList.size() ); + GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX ); + DirtyList.push( g ); + + dxSpace::add(g); +} + +void dxSAPSpace::remove( dxGeom* g ) +{ + CHECK_NOT_LOCKED(this); + dAASSERT(g); + dUASSERT(g->parent_space == this,"object is not in this space"); + + // remove + int dirtyIdx = GEOM_GET_DIRTY_IDX(g); + int geomIdx = GEOM_GET_GEOM_IDX(g); + // must be in one list, not in both + dUASSERT( + (dirtyIdx==GEOM_INVALID_IDX && geomIdx>=0 && geomIdx<GeomList.size()) || + (geomIdx==GEOM_INVALID_IDX && dirtyIdx>=0 && dirtyIdx<DirtyList.size()), + "geom indices messed up" ); + if( dirtyIdx != GEOM_INVALID_IDX ) { + // we're in dirty list, remove + int dirtySize = DirtyList.size(); + if (dirtyIdx != dirtySize-1) { + dxGeom* lastG = DirtyList[dirtySize-1]; + DirtyList[dirtyIdx] = lastG; + GEOM_SET_DIRTY_IDX(lastG,dirtyIdx); + } + GEOM_SET_DIRTY_IDX(g,GEOM_INVALID_IDX); + DirtyList.setSize( dirtySize-1 ); + } else { + // we're in geom list, remove + int geomSize = GeomList.size(); + if (geomIdx != geomSize-1) { + dxGeom* lastG = GeomList[geomSize-1]; + GeomList[geomIdx] = lastG; + GEOM_SET_GEOM_IDX(lastG,geomIdx); + } + GEOM_SET_GEOM_IDX(g,GEOM_INVALID_IDX); + GeomList.setSize( geomSize-1 ); + } + + dxSpace::remove(g); +} + +void dxSAPSpace::dirty( dxGeom* g ) +{ + dAASSERT(g); + dUASSERT(g->parent_space == this, "object is not in this space"); + + // check if already dirtied + int dirtyIdx = GEOM_GET_DIRTY_IDX(g); + if( dirtyIdx != GEOM_INVALID_IDX ) + return; + + int geomIdx = GEOM_GET_GEOM_IDX(g); + dUASSERT( geomIdx>=0 && geomIdx<GeomList.size(), "geom indices messed up" ); + + // remove from geom list, place last in place of this + int geomSize = GeomList.size(); + if (geomIdx != geomSize-1) { + dxGeom* lastG = GeomList[geomSize-1]; + GeomList[geomIdx] = lastG; + GEOM_SET_GEOM_IDX(lastG,geomIdx); + } + GeomList.setSize( geomSize-1 ); + + // add to dirty list + GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX ); + GEOM_SET_DIRTY_IDX( g, DirtyList.size() ); + DirtyList.push( g ); +} + +void dxSAPSpace::computeAABB() +{ + // TODO? +} + +void dxSAPSpace::cleanGeoms() +{ + int dirtySize = DirtyList.size(); + if( !dirtySize ) + return; + + // compute the AABBs of all dirty geoms, clear the dirty flags, + // remove from dirty list, place into geom list + lock_count++; + + int geomSize = GeomList.size(); + GeomList.setSize( geomSize + dirtySize ); // ensure space in geom list + + for( int i = 0; i < dirtySize; ++i ) { + dxGeom* g = DirtyList[i]; + if( IS_SPACE(g) ) { + ((dxSpace*)g)->cleanGeoms(); + } + + g->recomputeAABB(); + dIASSERT((g->gflags & GEOM_AABB_BAD) == 0); + + g->gflags &= ~GEOM_DIRTY; + + // remove from dirty list, add to geom list + GEOM_SET_DIRTY_IDX( g, GEOM_INVALID_IDX ); + GEOM_SET_GEOM_IDX( g, geomSize + i ); + GeomList[geomSize+i] = g; + } + // clear dirty list + DirtyList.setSize( 0 ); + + lock_count--; +} + +void dxSAPSpace::collide( void *data, dNearCallback *callback ) +{ + dAASSERT (callback); + + lock_count++; + + cleanGeoms(); + + // by now all geoms are in GeomList, and DirtyList must be empty + int geom_count = GeomList.size(); + dUASSERT( geom_count == count, "geom counts messed up" ); + + // separate all ENABLED geoms into infinite AABBs and normal AABBs + TmpGeomList.setSize(0); + TmpInfGeomList.setSize(0); + int axis0max = ax0idx + 1; + for( int i = 0; i < geom_count; ++i ) { + dxGeom* g = GeomList[i]; + if( !GEOM_ENABLED(g) ) // skip disabled ones + continue; + const dReal& amax = g->aabb[axis0max]; + if( amax == dInfinity ) // HACK? probably not... + TmpInfGeomList.push( g ); + else + TmpGeomList.push( g ); + } + + // do SAP on normal AABBs + dArray< Pair > overlapBoxes; + int tmp_geom_count = TmpGeomList.size(); + if ( tmp_geom_count > 0 ) + { + // Generate a list of overlapping boxes + BoxPruning( tmp_geom_count, (const dxGeom**)TmpGeomList.data(), overlapBoxes ); + } + + // collide overlapping + int overlapCount = overlapBoxes.size(); + for( int j = 0; j < overlapCount; ++j ) + { + const Pair& pair = overlapBoxes[ j ]; + dxGeom* g1 = TmpGeomList[ pair.id0 ]; + dxGeom* g2 = TmpGeomList[ pair.id1 ]; + collideGeomsNoAABBs( g1, g2, data, callback ); + } + + int infSize = TmpInfGeomList.size(); + int normSize = TmpGeomList.size(); + int m, n; + + for ( m = 0; m < infSize; ++m ) + { + dxGeom* g1 = TmpInfGeomList[ m ]; + + // collide infinite ones + for( n = m+1; n < infSize; ++n ) { + dxGeom* g2 = TmpInfGeomList[n]; + collideGeomsNoAABBs( g1, g2, data, callback ); + } + + // collide infinite ones with normal ones + for( n = 0; n < normSize; ++n ) { + dxGeom* g2 = TmpGeomList[n]; + collideGeomsNoAABBs( g1, g2, data, callback ); + } + } + + lock_count--; +} + +void dxSAPSpace::collide2( void *data, dxGeom *geom, dNearCallback *callback ) +{ + dAASSERT (geom && callback); + + // TODO: This is just a simple N^2 implementation + + lock_count++; + + cleanGeoms(); + geom->recomputeAABB(); + + // intersect bounding boxes + int geom_count = GeomList.size(); + for ( int i = 0; i < geom_count; ++i ) { + dxGeom* g = GeomList[i]; + if ( GEOM_ENABLED(g) ) + collideAABBs (g,geom,data,callback); + } + + lock_count--; +} + + +void dxSAPSpace::BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs ) +{ + // Size the poslist (+1 for infinity end cap) + poslist.setSize( count ); + + // 1) Build main list using the primary axis + // NOTE: uses floats instead of dReals because that's what radix sort wants + for( int i = 0; i < count; ++i ) + poslist[ i ] = (float)TmpGeomList[i]->aabb[ ax0idx ]; + + // 2) Sort the list + const uint32* Sorted = sortContext.RadixSort( poslist.data(), count ); + + // 3) Prune the list + const uint32* const LastSorted = Sorted + count; + const uint32* RunningAddress = Sorted; + + bool bExitLoop; + Pair IndexPair; + while ( Sorted < LastSorted ) + { + IndexPair.id0 = *Sorted++; + + // empty, this loop just advances RunningAddress + for (bExitLoop = false; poslist[*RunningAddress++] < poslist[IndexPair.id0]; ) + { + if (RunningAddress == LastSorted) + { + bExitLoop = true; + break; + } + } + + if ( bExitLoop || RunningAddress == LastSorted) // Not a bug!!! + { + break; + } + + const float idx0ax0max = (float)geoms[IndexPair.id0]->aabb[ax0idx+1]; // To avoid wrong decisions caused by rounding errors, cast the AABB element to float similarly as we did at the function beginning + const dReal idx0ax1max = geoms[IndexPair.id0]->aabb[ax1idx+1]; + const dReal idx0ax2max = geoms[IndexPair.id0]->aabb[ax2idx+1]; + + for (const uint32* RunningAddress2 = RunningAddress; poslist[ IndexPair.id1 = *RunningAddress2++ ] <= idx0ax0max; ) + { + const dReal* aabb0 = geoms[ IndexPair.id0 ]->aabb; + const dReal* aabb1 = geoms[ IndexPair.id1 ]->aabb; + + // Intersection? + if ( idx0ax1max >= aabb1[ax1idx] && aabb1[ax1idx+1] >= aabb0[ax1idx] + && idx0ax2max >= aabb1[ax2idx] && aabb1[ax2idx+1] >= aabb0[ax2idx] ) + { + pairs.push( IndexPair ); + } + + if (RunningAddress2 == LastSorted) + { + break; + } + } + + } // while ( RunningAddress < LastSorted && Sorted < LastSorted ) +} + + +//============================================================================== + +//------------------------------------------------------------------------------ +// Radix Sort +//------------------------------------------------------------------------------ + + + +#define CHECK_PASS_VALIDITY(pass) \ + /* Shortcut to current counters */ \ + const uint32* CurCount = &mHistogram[pass<<8]; \ + \ + /* Reset flag. The sorting pass is supposed to be performed. (default) */ \ + bool PerformPass = true; \ + \ + /* Check pass validity */ \ + \ + /* If all values have the same byte, sorting is useless. */ \ + /* It may happen when sorting bytes or words instead of dwords. */ \ + /* This routine actually sorts words faster than dwords, and bytes */ \ + /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \ + /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \ + \ + /* Get first byte */ \ + uint8 UniqueVal = *(((const uint8*)input)+pass); \ + \ + /* Check that byte's counter */ \ + if(CurCount[UniqueVal]==nb) PerformPass=false; + +// WARNING ONLY SORTS IEEE FLOATING-POINT VALUES +const uint32* RaixSortContext::RadixSort( const float* input2, uint32 nb ) +{ + union _type_cast_union + { + _type_cast_union(const float *floats): asFloats(floats) {} + _type_cast_union(const uint32 *uints32): asUInts32(uints32) {} + + const float *asFloats; + const uint32 *asUInts32; + const uint8 *asUInts8; + }; + + const uint32* input = _type_cast_union(input2).asUInts32; + + // Resize lists if needed + ReallocateRanksIfNecessary(nb); + + // Allocate histograms & offsets on the stack + uint32 mHistogram[256*4]; + uint32* mLink[256]; + + // Create histograms (counters). Counters for all passes are created in one run. + // Pros: read input buffer once instead of four times + // Cons: mHistogram is 4Kb instead of 1Kb + // Floating-point values are always supposed to be signed values, so there's only one code path there. + // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code + // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first + // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just + // wouldn't work with mixed positive/negative values.... + { + /* Clear counters/histograms */ + memset(mHistogram, 0, 256*4*sizeof(uint32)); + + /* Prepare to count */ + const uint8* p = _type_cast_union(input).asUInts8; + const uint8* pe = &p[nb*4]; + uint32* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */ + uint32* h1= &mHistogram[256]; /* Histogram for second pass */ + uint32* h2= &mHistogram[512]; /* Histogram for third pass */ + uint32* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */ + + bool AlreadySorted = true; /* Optimism... */ + + if (!AreRanksValid()) + { + /* Prepare for temporal coherence */ + const float* Running = input2; + float PrevVal = *Running; + + while(p!=pe) + { + /* Read input input2 in previous sorted order */ + float Val = *Running++; + /* Check whether already sorted or not */ + if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ + /* Update for next iteration */ + PrevVal = Val; + + /* Create histograms */ + h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; + } + + /* If all input values are already sorted, we just have to return and leave the */ + /* previous list unchanged. That way the routine may take advantage of temporal */ + /* coherence, for example when used to sort transparent faces. */ + if(AlreadySorted) + { + uint32* const Ranks1 = GetRanks1(); + for(uint32 i=0;i<nb;i++) Ranks1[i] = i; + return Ranks1; + } + } + else + { + /* Prepare for temporal coherence */ + uint32* const Ranks1 = GetRanks1(); + + uint32* Indices = Ranks1; + float PrevVal = input2[*Indices]; + + while(p!=pe) + { + /* Read input input2 in previous sorted order */ + float Val = input2[*Indices++]; + /* Check whether already sorted or not */ + if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ + /* Update for next iteration */ + PrevVal = Val; + + /* Create histograms */ + h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; + } + + /* If all input values are already sorted, we just have to return and leave the */ + /* previous list unchanged. That way the routine may take advantage of temporal */ + /* coherence, for example when used to sort transparent faces. */ + if(AlreadySorted) { return Ranks1; } + } + + /* Else there has been an early out and we must finish computing the histograms */ + while(p!=pe) + { + /* Create histograms without the previous overhead */ + h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; + } + } + + // Compute #negative values involved if needed + uint32 NbNegativeValues = 0; + + // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128 + // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte, + // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers. + uint32* h3= &mHistogram[768]; + for(uint32 i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part + + // Radix sort, j is the pass number (0=LSB, 3=MSB) + for(uint32 j=0;j<4;j++) + { + // Should we care about negative values? + if(j!=3) + { + // Here we deal with positive values only + CHECK_PASS_VALIDITY(j); + + if(PerformPass) + { + uint32* const Ranks2 = GetRanks2(); + // Create offsets + mLink[0] = Ranks2; + for(uint32 i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; + + // Perform Radix Sort + const uint8* InputBytes = _type_cast_union(input).asUInts8; + InputBytes += j; + if (!AreRanksValid()) + { + for(uint32 i=0;i<nb;i++) + { + *mLink[InputBytes[i<<2]]++ = i; + } + + ValidateRanks(); + } + else + { + uint32* const Ranks1 = GetRanks1(); + + uint32* Indices = Ranks1; + uint32* const IndicesEnd = Ranks1 + nb; + while(Indices!=IndicesEnd) + { + uint32 id = *Indices++; + *mLink[InputBytes[id<<2]]++ = id; + } + } + + // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. + SwapRanks(); + } + } + else + { + // This is a special case to correctly handle negative values + CHECK_PASS_VALIDITY(j); + + if(PerformPass) + { + uint32* const Ranks2 = GetRanks2(); + + // Create biased offsets, in order for negative numbers to be sorted as well + mLink[0] = Ranks2 + NbNegativeValues; // First positive number takes place after the negative ones + for(uint32 i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers + + // We must reverse the sorting order for negative numbers! + mLink[255] = Ranks2; + for(uint32 i=0;i<127;i++) mLink[254-i] = mLink[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values + for(uint32 i=128;i<256;i++) mLink[i] += CurCount[i]; // Fixing the wrong place for negative values + + // Perform Radix Sort + if (!AreRanksValid()) + { + for(uint32 i=0;i<nb;i++) + { + uint32 Radix = input[i]>>24; // Radix byte, same as above. AND is useless here (uint32). + // ### cmp to be killed. Not good. Later. + if(Radix<128) *mLink[Radix]++ = i; // Number is positive, same as above + else *(--mLink[Radix]) = i; // Number is negative, flip the sorting order + } + + ValidateRanks(); + } + else + { + uint32* const Ranks1 = GetRanks1(); + + for(uint32 i=0;i<nb;i++) + { + uint32 Radix = input[Ranks1[i]]>>24; // Radix byte, same as above. AND is useless here (uint32). + // ### cmp to be killed. Not good. Later. + if(Radix<128) *mLink[Radix]++ = Ranks1[i]; // Number is positive, same as above + else *(--mLink[Radix]) = Ranks1[i]; // Number is negative, flip the sorting order + } + } + // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. + SwapRanks(); + } + else + { + // The pass is useless, yet we still have to reverse the order of current list if all values are negative. + if(UniqueVal>=128) + { + if (!AreRanksValid()) + { + uint32* const Ranks2 = GetRanks2(); + // ###Possible? + for(uint32 i=0;i<nb;i++) + { + Ranks2[i] = nb-i-1; + } + + ValidateRanks(); + } + else + { + uint32* const Ranks1 = GetRanks1(); + uint32* const Ranks2 = GetRanks2(); + for(uint32 i=0;i<nb;i++) Ranks2[i] = Ranks1[nb-i-1]; + } + + // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. + SwapRanks(); + } + } + } + } + + // Return indices + uint32* const Ranks1 = GetRanks1(); + return Ranks1; +} + |