summaryrefslogtreecommitdiff
path: root/libs/ode-0.16.1/ode/src/fastltsolve.cpp
diff options
context:
space:
mode:
authorsanine <sanine.not@pm.me>2022-10-01 20:59:36 -0500
committersanine <sanine.not@pm.me>2022-10-01 20:59:36 -0500
commitc5fc66ee58f2c60f2d226868bb1cf5b91badaf53 (patch)
tree277dd280daf10bf77013236b8edfa5f88708c7e0 /libs/ode-0.16.1/ode/src/fastltsolve.cpp
parent1cf9cc3408af7008451f9133fb95af66a9697d15 (diff)
add ode
Diffstat (limited to 'libs/ode-0.16.1/ode/src/fastltsolve.cpp')
-rw-r--r--libs/ode-0.16.1/ode/src/fastltsolve.cpp229
1 files changed, 229 insertions, 0 deletions
diff --git a/libs/ode-0.16.1/ode/src/fastltsolve.cpp b/libs/ode-0.16.1/ode/src/fastltsolve.cpp
new file mode 100644
index 0000000..e9c7ec5
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastltsolve.cpp
@@ -0,0 +1,229 @@
+/*************************************************************************
+ * *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
+ * All rights reserved. Email: russ@q12.org Web: www.q12.org *
+ * *
+ * This library is free software; you can redistribute it and/or *
+ * modify it under the terms of EITHER: *
+ * (1) The GNU Lesser General Public License as published by the Free *
+ * Software Foundation; either version 2.1 of the License, or (at *
+ * your option) any later version. The text of the GNU Lesser *
+ * General Public License is included with this library in the *
+ * file LICENSE.TXT. *
+ * (2) The BSD-style license that is included with this library in *
+ * the file LICENSE-BSD.TXT. *
+ * *
+ * This library is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
+ * *
+ *************************************************************************/
+
+/*
+ * L1Transposed Equation Solving Routines
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+#include <ode/common.h>
+#include <ode/matrix.h>
+#include <ode/matrix_coop.h>
+#include "config.h"
+#include "threaded_solver_ldlt.h"
+#include "threading_base.h"
+#include "resource_control.h"
+#include "error.h"
+
+#include "fastltsolve_impl.h"
+
+
+/*static */
+void ThreadedEquationSolverLDLT::estimateCooperativeSolvingL1TransposedResourceRequirements(
+ dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+ unsigned allowedThreadCount, unsigned rowCount)
+{
+ dxThreadingBase *threading = summaryRequirementsDescriptor->getrelatedThreading();
+ unsigned limitedThreadCount = restrictSolvingL1TransposedAllowedThreadCount(threading, allowedThreadCount, rowCount);
+
+ if (limitedThreadCount > 1)
+ {
+ doEstimateCooperativeSolvingL1TransposedResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
+ }
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::cooperativelySolveL1Transposed(
+ dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount,
+ const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+ dIASSERT(rowCount != 0);
+
+ dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+ unsigned limitedThreadCount = restrictSolvingL1TransposedAllowedThreadCount(threading, allowedThreadCount, rowCount);
+
+ if (limitedThreadCount <= 1)
+ {
+ solveL1Transposed<SL1T_B_STRIDE>(L, b, rowCount, rowSkip);
+ }
+ else
+ {
+ doCooperativelySolveL1TransposedValidated(resourceContainer, limitedThreadCount, L, b, rowCount, rowSkip);
+ }
+}
+
+
+/*static */
+unsigned ThreadedEquationSolverLDLT::restrictSolvingL1TransposedAllowedThreadCount(
+ dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount)
+{
+ unsigned limitedThreadCount = 1;
+
+#if dCOOPERATIVE_ENABLED
+ const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
+ unsigned solvingBlockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
+ dIASSERT(deriveSolvingL1TransposedThreadCount(SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM, 2) > 1);
+
+ if (solvingBlockCount >= SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM)
+ {
+ limitedThreadCount = threading->calculateThreadingLimitedThreadCount(allowedThreadCount, true);
+ }
+#endif // #if dCOOPERATIVE_ENABLED
+
+ return limitedThreadCount;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::doEstimateCooperativeSolvingL1TransposedResourceRequirementsValidated(
+ dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+ unsigned allowedThreadCount, unsigned rowCount)
+{
+ const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
+ unsigned blockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
+ dIASSERT(blockCount >= 1);
+
+ unsigned threadCountToUse = deriveSolvingL1TransposedThreadCount(blockCount, allowedThreadCount);
+ dIASSERT(threadCountToUse > 1);
+
+ unsigned simultaneousCallCount = 1 + (threadCountToUse - 1);
+
+ SolvingL1TransposedMemoryEstimates solvingMemoryEstimates;
+ sizeint solvingMemoryRequired = estimateCooperativelySolvingL1TransposedMemoryRequirement<blockStep>(rowCount, solvingMemoryEstimates);
+ const unsigned solvingAlignmentRequired = ALLOCATION_DEFAULT_ALIGNMENT;
+
+ unsigned featureRequirement = dxResourceRequirementDescriptor::STOCK_CALLWAIT_REQUIRED;
+ summaryRequirementsDescriptor->mergeAnotherDescriptorIn(solvingMemoryRequired, solvingAlignmentRequired, simultaneousCallCount, featureRequirement);
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::doCooperativelySolveL1TransposedValidated(
+ dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount,
+ const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+ dIASSERT(allowedThreadCount > 1);
+
+ const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
+ unsigned blockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
+ dIASSERT(blockCount >= 1);
+
+ unsigned threadCountToUse = deriveSolvingL1TransposedThreadCount(blockCount, allowedThreadCount);
+ dIASSERT(threadCountToUse > 1);
+
+ dCallWaitID completionWait = resourceContainer->getStockCallWait();
+ dAASSERT(completionWait != NULL);
+
+ atomicord32 blockCompletionProgress;
+ cellindexint *blockProgressDescriptors;
+ SolveL1TransposedCellContext *cellContexts;
+
+ SolvingL1TransposedMemoryEstimates solvingMemoryEstimates;
+ sizeint solvingMemoryRequired = estimateCooperativelySolvingL1TransposedMemoryRequirement<blockStep>(rowCount, solvingMemoryEstimates);
+ dIASSERT(solvingMemoryRequired <= resourceContainer->getMemoryBufferSize());
+
+ void *bufferAllocated = resourceContainer->getMemoryBufferPointer();
+ dIASSERT(bufferAllocated != NULL);
+ dIASSERT(dALIGN_PTR(bufferAllocated, ALLOCATION_DEFAULT_ALIGNMENT) == bufferAllocated);
+
+ void *bufferCurrentLocation = bufferAllocated;
+ bufferCurrentLocation = markCooperativelySolvingL1TransposedMemoryStructuresOut(bufferCurrentLocation, solvingMemoryEstimates, blockProgressDescriptors, cellContexts);
+ dIVERIFY(bufferCurrentLocation <= (uint8 *)bufferAllocated + solvingMemoryRequired);
+
+ initializeCooperativelySolveL1TransposedMemoryStructures<blockStep>(rowCount, blockCompletionProgress, blockProgressDescriptors, cellContexts);
+
+ dCallReleaseeID calculationFinishReleasee;
+ SolveL1TransposedWorkerContext workerContext; // The variable must exist in the outer scope
+
+ workerContext.init(L, b, rowCount, rowSkip, blockCompletionProgress, blockProgressDescriptors, cellContexts);
+
+ dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+ threading->PostThreadedCall(NULL, &calculationFinishReleasee, threadCountToUse - 1, NULL, completionWait, &solveL1Transposed_completion_callback, NULL, 0, "SolveL1Transposed Completion");
+ threading->PostThreadedCallsGroup(NULL, threadCountToUse - 1, calculationFinishReleasee, &solveL1Transposed_worker_callback, &workerContext, "SolveL1Transposed Work");
+
+ participateSolvingL1Transposed<blockStep, SL1T_B_STRIDE>(L, b, rowCount, rowSkip, blockCompletionProgress, blockProgressDescriptors, cellContexts, threadCountToUse - 1);
+
+ threading->WaitThreadedCallExclusively(NULL, completionWait, NULL, "SolveL1Transposed End Wait");
+}
+
+/*static */
+int ThreadedEquationSolverLDLT::solveL1Transposed_worker_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID dUNUSED(callThisReleasee))
+{
+ SolveL1TransposedWorkerContext *ptrContext = (SolveL1TransposedWorkerContext *)callContext;
+
+ solveL1Transposed_worker(*ptrContext, dCAST_TO_SMALLER(unsigned, callInstanceIndex));
+
+ return 1;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::solveL1Transposed_worker(SolveL1TransposedWorkerContext &ref_context, unsigned ownThreadIndex)
+{
+ const unsigned blockStep = SL1T_BLOCK_SIZE;
+ participateSolvingL1Transposed<blockStep, SL1T_B_STRIDE>(ref_context.m_L, ref_context.m_b, ref_context.m_rowCount, ref_context.m_rowSkip,
+ *ref_context.m_ptrBlockCompletionProgress, ref_context.m_blockProgressDescriptors, ref_context.m_cellContexts, ownThreadIndex);
+}
+
+/*static */
+int ThreadedEquationSolverLDLT::solveL1Transposed_completion_callback(void *dUNUSED(callContext), dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
+{
+ return 1;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// Public interface functions
+
+/*extern ODE_API */
+void dSolveL1T(const dReal *L, dReal *B, int rowCount, int rowSkip)
+{
+ dAASSERT(rowCount != 0);
+
+ if (rowCount != 0)
+ {
+ dAASSERT(L != NULL);
+ dAASSERT(B != NULL);
+
+ solveL1Transposed<1>(L, B, rowCount, rowSkip);
+ }
+}
+
+
+/*extern ODE_API */
+void dEstimateCooperativelySolveL1TransposedResourceRequirements(dResourceRequirementsID requirements,
+ unsigned maximalAllowedThreadCount, unsigned maximalRowCount)
+{
+ dAASSERT(requirements != NULL);
+
+ dxResourceRequirementDescriptor *requirementsDescriptor = (dxResourceRequirementDescriptor *)requirements;
+ ThreadedEquationSolverLDLT::estimateCooperativeSolvingL1TransposedResourceRequirements(requirementsDescriptor, maximalAllowedThreadCount, maximalRowCount);
+}
+
+/*extern ODE_API */
+void dCooperativelySolveL1Transposed(dResourceContainerID resources, unsigned allowedThreadCount,
+ const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+ dAASSERT(resources != NULL);
+
+ dxRequiredResourceContainer *resourceContainer = (dxRequiredResourceContainer *)resources;
+ ThreadedEquationSolverLDLT::cooperativelySolveL1Transposed(resourceContainer, allowedThreadCount, L, b, rowCount, rowSkip);
+}
+