summaryrefslogtreecommitdiff
path: root/libs/ode-0.16.1/ode/src/fastldltsolve.cpp
diff options
context:
space:
mode:
authorsanine <sanine.not@pm.me>2022-10-01 20:59:36 -0500
committersanine <sanine.not@pm.me>2022-10-01 20:59:36 -0500
commitc5fc66ee58f2c60f2d226868bb1cf5b91badaf53 (patch)
tree277dd280daf10bf77013236b8edfa5f88708c7e0 /libs/ode-0.16.1/ode/src/fastldltsolve.cpp
parent1cf9cc3408af7008451f9133fb95af66a9697d15 (diff)
add ode
Diffstat (limited to 'libs/ode-0.16.1/ode/src/fastldltsolve.cpp')
-rw-r--r--libs/ode-0.16.1/ode/src/fastldltsolve.cpp222
1 files changed, 222 insertions, 0 deletions
diff --git a/libs/ode-0.16.1/ode/src/fastldltsolve.cpp b/libs/ode-0.16.1/ode/src/fastldltsolve.cpp
new file mode 100644
index 0000000..ca1ff4d
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastldltsolve.cpp
@@ -0,0 +1,222 @@
+/*************************************************************************
+ * *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
+ * All rights reserved. Email: russ@q12.org Web: www.q12.org *
+ * *
+ * This library is free software; you can redistribute it and/or *
+ * modify it under the terms of EITHER: *
+ * (1) The GNU Lesser General Public License as published by the Free *
+ * Software Foundation; either version 2.1 of the License, or (at *
+ * your option) any later version. The text of the GNU Lesser *
+ * General Public License is included with this library in the *
+ * file LICENSE.TXT. *
+ * (2) The BSD-style license that is included with this library in *
+ * the file LICENSE-BSD.TXT. *
+ * *
+ * This library is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
+ * *
+ *************************************************************************/
+
+/*
+ * LDLT solving related code of ThreadedEquationSolverLDLT
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+#include <ode/common.h>
+#include <ode/matrix.h>
+#include <ode/matrix_coop.h>
+#include "config.h"
+#include "threaded_solver_ldlt.h"
+#include "threading_base.h"
+#include "resource_control.h"
+
+#include "fastldltsolve_impl.h"
+
+
+/*static */
+void ThreadedEquationSolverLDLT::estimateCooperativeSolvingLDLTResourceRequirements(
+ dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+ unsigned allowedThreadCount, unsigned rowCount)
+{
+ unsigned stageBlockCountSifficiencyMask;
+ dxThreadingBase *threading = summaryRequirementsDescriptor->getrelatedThreading();
+ unsigned limitedThreadCount = restrictSolvingLDLTAllowedThreadCount(threading, allowedThreadCount, rowCount, stageBlockCountSifficiencyMask);
+
+ if (limitedThreadCount > 1)
+ {
+ if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_STRAIGHT)) != 0)
+ {
+ doEstimateCooperativeSolvingL1StraightResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
+ }
+
+ if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SCALING_VECTOR)) != 0)
+ {
+ doEstimateCooperativeScalingVectorResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
+ }
+
+ if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_TRANSPOSED)) == 0)
+ {
+ doEstimateCooperativeSolvingL1TransposedResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
+ }
+ }
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::cooperativelySolveLDLT(
+ dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount,
+ const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+ dAASSERT(rowCount != 0);
+
+ unsigned stageBlockCountSifficiencyMask;
+
+ dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+ unsigned limitedThreadCount = restrictSolvingLDLTAllowedThreadCount(threading, allowedThreadCount, rowCount, stageBlockCountSifficiencyMask);
+
+ if (limitedThreadCount <= 1)
+ {
+ solveEquationSystemWithLDLT<SLDLT_D_STRIDE, SLDLT_B_STRIDE>(L, d, b, rowCount, rowSkip);
+ }
+ else
+ {
+ doCooperativelySolveLDLTValidated(resourceContainer, limitedThreadCount, stageBlockCountSifficiencyMask, L, d, b, rowCount, rowSkip);
+ }
+}
+
+/*static */
+unsigned ThreadedEquationSolverLDLT::restrictSolvingLDLTAllowedThreadCount(
+ dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount, unsigned &out_stageBlockCountSifficiencyMask)
+{
+ unsigned limitedThreadCount = 1;
+ unsigned stageBlockCountSifficiencyMask = 0;
+
+#if dCOOPERATIVE_ENABLED
+ {
+ const unsigned int blockStep = SL1S_BLOCK_SIZE; // Required by the implementation
+ unsigned solvingStraightBlockCount = deriveSolvingL1StraightBlockCount(rowCount, blockStep);
+ dIASSERT(deriveSolvingL1StraightThreadCount(SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM, 2) > 1);
+
+ if (solvingStraightBlockCount >= SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM)
+ {
+ stageBlockCountSifficiencyMask |= 1U << SLDLTS_SOLVING_STRAIGHT;
+ }
+ }
+
+ {
+ const unsigned int blockStep = SV_BLOCK_SIZE; // Required by the implementation
+ unsigned scalingBlockCount = deriveScalingVectorBlockCount(rowCount, blockStep);
+ dIASSERT(deriveScalingVectorThreadCount(SV_COOPERATIVE_BLOCK_COUNT_MINIMUM - 1, 2) > 1);
+
+ if (scalingBlockCount >= SV_COOPERATIVE_BLOCK_COUNT_MINIMUM)
+ {
+ stageBlockCountSifficiencyMask |= 1U << SLDLTS_SCALING_VECTOR;
+ }
+ }
+
+ {
+ const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
+ unsigned solvingTransposedBlockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
+ dIASSERT(deriveSolvingL1TransposedThreadCount(SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM, 2) > 1);
+
+ if (solvingTransposedBlockCount >= SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM)
+ {
+ stageBlockCountSifficiencyMask |= 1U << SLDLTS_SOLVING_TRANSPOSED;
+ }
+ }
+
+ if (stageBlockCountSifficiencyMask != 0)
+ {
+ limitedThreadCount = threading->calculateThreadingLimitedThreadCount(allowedThreadCount, true);
+ }
+#endif // #if dCOOPERATIVE_ENABLED
+
+ out_stageBlockCountSifficiencyMask = stageBlockCountSifficiencyMask;
+ return limitedThreadCount;
+}
+
+
+/*static */
+void ThreadedEquationSolverLDLT::doCooperativelySolveLDLTValidated(
+ dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, unsigned stageBlockCountSifficiencyMask,
+ const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+ dIASSERT(allowedThreadCount > 1);
+
+ if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_STRAIGHT)) == 0)
+ {
+ solveL1Straight<SLDLT_B_STRIDE>(L, b, rowCount, rowSkip);
+ }
+ else
+ {
+ dSASSERT(SLDLT_B_STRIDE + 0 == SL1S_B_STRIDE);
+
+ doCooperativelySolveL1StraightValidated(resourceContainer, allowedThreadCount, L, b, rowCount, rowSkip);
+ }
+
+ if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SCALING_VECTOR)) == 0)
+ {
+ scaleLargeVector<SLDLT_B_STRIDE, SLDLT_D_STRIDE>(b, d, rowCount);
+ }
+ else
+ {
+ dSASSERT(SLDLT_B_STRIDE + 0 == SV_A_STRIDE);
+ dSASSERT(SLDLT_D_STRIDE + 0 == SV_D_STRIDE);
+
+ doCooperativelyScaleVectorValidated(resourceContainer, allowedThreadCount, b, d, rowCount);
+ }
+
+ if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_TRANSPOSED)) == 0)
+ {
+ solveL1Transposed<SLDLT_B_STRIDE>(L, b, rowCount, rowSkip);
+ }
+ else
+ {
+ dSASSERT(SLDLT_B_STRIDE + 0 == SL1T_B_STRIDE);
+
+ doCooperativelySolveL1TransposedValidated(resourceContainer, allowedThreadCount, L, b, rowCount, rowSkip);
+ }
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Public interface functions
+
+/*extern ODE_API */
+void dSolveLDLT(const dReal *L, const dReal *d, dReal *b, int n, int nskip)
+{
+ dAASSERT(n != 0);
+
+ if (n != 0)
+ {
+ dAASSERT(L != NULL);
+ dAASSERT(d != NULL);
+ dAASSERT(b != NULL);
+
+ solveEquationSystemWithLDLT<1, 1>(L, d, b, n, nskip);
+ }
+}
+
+
+/*extern ODE_API */
+void dEstimateCooperativelySolveLDLTResourceRequirements(dResourceRequirementsID requirements,
+ unsigned maximalAllowedThreadCount, unsigned maximalRowCount)
+{
+ dAASSERT(requirements != NULL);
+
+ dxResourceRequirementDescriptor *requirementsDescriptor = (dxResourceRequirementDescriptor *)requirements;
+ ThreadedEquationSolverLDLT::estimateCooperativeSolvingLDLTResourceRequirements(requirementsDescriptor, maximalAllowedThreadCount, maximalRowCount);
+}
+
+/*extern ODE_API */
+void dCooperativelySolveLDLT(dResourceContainerID resources, unsigned allowedThreadCount,
+ const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+ dAASSERT(resources != NULL);
+
+ dxRequiredResourceContainer *resourceContainer = (dxRequiredResourceContainer *)resources;
+ ThreadedEquationSolverLDLT::cooperativelySolveLDLT(resourceContainer, allowedThreadCount, L, d, b, rowCount, rowSkip);
+}
+