summaryrefslogtreecommitdiff
path: root/libs/assimp/code/PostProcessing/ImproveCacheLocality.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libs/assimp/code/PostProcessing/ImproveCacheLocality.cpp')
-rw-r--r--libs/assimp/code/PostProcessing/ImproveCacheLocality.cpp379
1 files changed, 379 insertions, 0 deletions
diff --git a/libs/assimp/code/PostProcessing/ImproveCacheLocality.cpp b/libs/assimp/code/PostProcessing/ImproveCacheLocality.cpp
new file mode 100644
index 0000000..56bdfc4
--- /dev/null
+++ b/libs/assimp/code/PostProcessing/ImproveCacheLocality.cpp
@@ -0,0 +1,379 @@
+/*
+---------------------------------------------------------------------------
+Open Asset Import Library (assimp)
+---------------------------------------------------------------------------
+
+Copyright (c) 2006-2022, assimp team
+
+
+
+All rights reserved.
+
+Redistribution and use of this software in source and binary forms,
+with or without modification, are permitted provided that the following
+conditions are met:
+
+* Redistributions of source code must retain the above
+ copyright notice, this list of conditions and the
+ following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the
+ following disclaimer in the documentation and/or other
+ materials provided with the distribution.
+
+* Neither the name of the assimp team, nor the names of its
+ contributors may be used to endorse or promote products
+ derived from this software without specific prior
+ written permission of the assimp team.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+---------------------------------------------------------------------------
+*/
+
+/** @file Implementation of the post processing step to improve the cache locality of a mesh.
+ * <br>
+ * The algorithm is roughly basing on this paper:
+ * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
+ * .. although overdraw reduction isn't implemented yet ...
+ */
+
+// internal headers
+#include "PostProcessing/ImproveCacheLocality.h"
+#include "Common/VertexTriangleAdjacency.h"
+
+#include <assimp/StringUtils.h>
+#include <assimp/postprocess.h>
+#include <assimp/scene.h>
+#include <assimp/DefaultLogger.hpp>
+#include <stdio.h>
+#include <stack>
+
+using namespace Assimp;
+
+// ------------------------------------------------------------------------------------------------
+// Constructor to be privately used by Importer
+ImproveCacheLocalityProcess::ImproveCacheLocalityProcess()
+: mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
+ // empty
+}
+
+// ------------------------------------------------------------------------------------------------
+// Destructor, private as well
+ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess() {
+ // nothing to do here
+}
+
+// ------------------------------------------------------------------------------------------------
+// Returns whether the processing step is present in the given flag field.
+bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const {
+ return (pFlags & aiProcess_ImproveCacheLocality) != 0;
+}
+
+// ------------------------------------------------------------------------------------------------
+// Setup configuration
+void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp) {
+ // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
+ mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
+}
+
+// ------------------------------------------------------------------------------------------------
+// Executes the post processing step on the given imported data.
+void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
+ if (!pScene->mNumMeshes) {
+ ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess skipped; there are no meshes");
+ return;
+ }
+
+ ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess begin");
+
+ float out = 0.f;
+ unsigned int numf = 0, numm = 0;
+ for( unsigned int a = 0; a < pScene->mNumMeshes; ++a ){
+ const float res = ProcessMesh( pScene->mMeshes[a],a);
+ if (res) {
+ numf += pScene->mMeshes[a]->mNumFaces;
+ out += res;
+ ++numm;
+ }
+ }
+ if (!DefaultLogger::isNullLogger()) {
+ if (numf > 0) {
+ ASSIMP_LOG_INFO("Cache relevant are ", numm, " meshes (", numf, " faces). Average output ACMR is ", out / numf);
+ }
+ ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess finished. ");
+ }
+}
+
+// ------------------------------------------------------------------------------------------------
+// Improves the cache coherency of a specific mesh
+ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum) {
+ // TODO: rewrite this to use std::vector or boost::shared_array
+ ai_assert(nullptr != pMesh);
+
+ // Check whether the input data is valid
+ // - there must be vertices and faces
+ // - all faces must be triangulated or we can't operate on them
+ if (!pMesh->HasFaces() || !pMesh->HasPositions())
+ return static_cast<ai_real>(0.f);
+
+ if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
+ ASSIMP_LOG_ERROR("This algorithm works on triangle meshes only");
+ return static_cast<ai_real>(0.f);
+ }
+
+ if(pMesh->mNumVertices <= mConfigCacheDepth) {
+ return static_cast<ai_real>(0.f);
+ }
+
+ ai_real fACMR = 3.f;
+ const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
+
+ // Input ACMR is for logging purposes only
+ if (!DefaultLogger::isNullLogger()) {
+
+ unsigned int* piFIFOStack = new unsigned int[mConfigCacheDepth];
+ memset(piFIFOStack,0xff,mConfigCacheDepth*sizeof(unsigned int));
+ unsigned int* piCur = piFIFOStack;
+ const unsigned int* const piCurEnd = piFIFOStack + mConfigCacheDepth;
+
+ // count the number of cache misses
+ unsigned int iCacheMisses = 0;
+ for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
+ for (unsigned int qq = 0; qq < 3;++qq) {
+ bool bInCache = false;
+ for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
+ if (*pp == pcFace->mIndices[qq]) {
+ // the vertex is in cache
+ bInCache = true;
+ break;
+ }
+ }
+ if (!bInCache) {
+ ++iCacheMisses;
+ if (piCurEnd == piCur) {
+ piCur = piFIFOStack;
+ }
+ *piCur++ = pcFace->mIndices[qq];
+ }
+ }
+ }
+ delete[] piFIFOStack;
+ fACMR = (ai_real) iCacheMisses / pMesh->mNumFaces;
+ if (3.0 == fACMR) {
+ char szBuff[128]; // should be sufficiently large in every case
+
+ // the JoinIdenticalVertices process has not been executed on this
+ // mesh, otherwise this value would normally be at least minimally
+ // smaller than 3.0 ...
+ ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization",meshNum);
+ ASSIMP_LOG_WARN(szBuff);
+ return static_cast<ai_real>(0.f);
+ }
+ }
+
+ // first we need to build a vertex-triangle adjacency list
+ VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
+
+ // build a list to store per-vertex caching time stamps
+ unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
+ memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
+
+ // allocate an empty output index buffer. We store the output indices in one large array.
+ // Since the number of triangles won't change the input faces can be reused. This is how
+ // we save thousands of redundant mini allocations for aiFace::mIndices
+ const unsigned int iIdxCnt = pMesh->mNumFaces*3;
+ unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
+ unsigned int* piCSIter = piIBOutput;
+
+ // allocate the flag array to hold the information
+ // whether a face has already been emitted or not
+ std::vector<bool> abEmitted(pMesh->mNumFaces,false);
+
+ // dead-end vertex index stack
+ std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
+
+ // create a copy of the piNumTriPtr buffer
+ unsigned int* const piNumTriPtr = adj.mLiveTriangles;
+ const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
+
+ // get the largest number of referenced triangles and allocate the "candidate buffer"
+ unsigned int iMaxRefTris = 0; {
+ const unsigned int* piCur = adj.mLiveTriangles;
+ const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
+ for (;piCur != piCurEnd;++piCur) {
+ iMaxRefTris = std::max(iMaxRefTris,*piCur);
+ }
+ }
+ ai_assert(iMaxRefTris > 0);
+ unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
+ unsigned int iCacheMisses = 0;
+
+ // ...................................................................................
+ /** PSEUDOCODE for the algorithm
+
+ A = Build-Adjacency(I) Vertex-triangle adjacency
+ L = Get-Triangle-Counts(A) Per-vertex live triangle counts
+ C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
+ D = Empty-Stack() Dead-end vertex stack
+ E = False(Triangle-Count(I)) Per triangle emitted flag
+ O = Empty-Index-Buffer() Empty output buffer
+ f = 0 Arbitrary starting vertex
+ s = k+1, i = 1 Time stamp and cursor
+ while f >= 0 For all valid fanning vertices
+ N = Empty-Set() 1-ring of next candidates
+ for each Triangle t in Neighbors(A, f)
+ if !Emitted(E,t)
+ for each Vertex v in t
+ Append(O,v) Output vertex
+ Push(D,v) Add to dead-end stack
+ Insert(N,v) Register as candidate
+ L[v] = L[v]-1 Decrease live triangle count
+ if s-C[v] > k If not in cache
+ C[v] = s Set time stamp
+ s = s+1 Increment time stamp
+ E[t] = true Flag triangle as emitted
+ Select next fanning vertex
+ f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
+ return O
+ */
+ // ...................................................................................
+
+ int ivdx = 0;
+ int ics = 1;
+ int iStampCnt = mConfigCacheDepth+1;
+ while (ivdx >= 0) {
+
+ unsigned int icnt = piNumTriPtrNoModify[ivdx];
+ unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
+ unsigned int* piCurCandidate = piCandidates;
+
+ // get all triangles in the neighborhood
+ for (unsigned int tri = 0; tri < icnt;++tri) {
+
+ // if they have not yet been emitted, add them to the output IB
+ const unsigned int fidx = *piList++;
+ if (!abEmitted[fidx]) {
+
+ // so iterate through all vertices of the current triangle
+ const aiFace* pcFace = &pMesh->mFaces[ fidx ];
+ unsigned nind = pcFace->mNumIndices;
+ for (unsigned ind = 0; ind < nind; ind++) {
+ unsigned dp = pcFace->mIndices[ind];
+
+ // the current vertex won't have any free triangles after this step
+ if (ivdx != (int)dp) {
+ // append the vertex to the dead-end stack
+ sDeadEndVStack.push(dp);
+
+ // register as candidate for the next step
+ *piCurCandidate++ = dp;
+
+ // decrease the per-vertex triangle counts
+ piNumTriPtr[dp]--;
+ }
+
+ // append the vertex to the output index buffer
+ *piCSIter++ = dp;
+
+ // if the vertex is not yet in cache, set its cache count
+ if (iStampCnt-piCachingStamps[dp] > mConfigCacheDepth) {
+ piCachingStamps[dp] = iStampCnt++;
+ ++iCacheMisses;
+ }
+ }
+ // flag triangle as emitted
+ abEmitted[fidx] = true;
+ }
+ }
+
+ // the vertex has now no living adjacent triangles anymore
+ piNumTriPtr[ivdx] = 0;
+
+ // get next fanning vertex
+ ivdx = -1;
+ int max_priority = -1;
+ for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) {
+ const unsigned int dp = *piCur;
+
+ // must have live triangles
+ if (piNumTriPtr[dp] > 0) {
+ int priority = 0;
+
+ // will the vertex be in cache, even after fanning occurs?
+ unsigned int tmp;
+ if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= mConfigCacheDepth) {
+ priority = tmp;
+ }
+
+ // keep best candidate
+ if (priority > max_priority) {
+ max_priority = priority;
+ ivdx = dp;
+ }
+ }
+ }
+ // did we reach a dead end?
+ if (-1 == ivdx) {
+ // need to get a non-local vertex for which we have a good chance that it is still
+ // in the cache ...
+ while (!sDeadEndVStack.empty()) {
+ unsigned int iCachedIdx = sDeadEndVStack.top();
+ sDeadEndVStack.pop();
+ if (piNumTriPtr[ iCachedIdx ] > 0) {
+ ivdx = iCachedIdx;
+ break;
+ }
+ }
+
+ if (-1 == ivdx) {
+ // well, there isn't such a vertex. Simply get the next vertex in input order and
+ // hope it is not too bad ...
+ while (ics < (int)pMesh->mNumVertices) {
+ ++ics;
+ if (piNumTriPtr[ics] > 0) {
+ ivdx = ics;
+ break;
+ }
+ }
+ }
+ }
+ }
+ ai_real fACMR2 = 0.0f;
+ if (!DefaultLogger::isNullLogger()) {
+ fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
+
+ // very intense verbose logging ... prepare for much text if there are many meshes
+ if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
+ ASSIMP_LOG_VERBOSE_DEBUG("Mesh %u | ACMR in: ", meshNum, " out: ", fACMR, " | ~", fACMR2, ((fACMR - fACMR2) / fACMR) * 100.f);
+ }
+
+ fACMR2 *= pMesh->mNumFaces;
+ }
+ // sort the output index buffer back to the input array
+ piCSIter = piIBOutput;
+ for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) {
+ unsigned nind = pcFace->mNumIndices;
+ unsigned * ind = pcFace->mIndices;
+ if (nind > 0) ind[0] = *piCSIter++;
+ if (nind > 1) ind[1] = *piCSIter++;
+ if (nind > 2) ind[2] = *piCSIter++;
+ }
+
+ // delete temporary storage
+ delete[] piCachingStamps;
+ delete[] piIBOutput;
+ delete[] piCandidates;
+
+ return fACMR2;
+}